Index: llvm/trunk/lib/Target/X86/X86FastISel.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FastISel.cpp +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp @@ -367,6 +367,10 @@ switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: + // TODO: Support this properly. + if (Subtarget->hasAVX512()) + return false; + LLVM_FALLTHROUGH; case MVT::i8: Opc = X86::MOV8rm; RC = &X86::GR8RegClass; @@ -540,11 +544,12 @@ // In case ValReg is a K register, COPY to a GPR if (MRI.getRegClass(ValReg) == &X86::VK1RegClass) { unsigned KValReg = ValReg; - ValReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass - : &X86::GR8_ABCD_LRegClass); + ValReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ValReg) .addReg(KValReg); + ValReg = fastEmitInst_extractsubreg(MVT::i8, ValReg, /*Kill=*/true, + X86::sub_8bit); } // Mask out all but lowest bit. unsigned AndResult = createResultReg(&X86::GR8RegClass); @@ -1280,11 +1285,12 @@ // In case SrcReg is a K register, COPY to a GPR if (MRI.getRegClass(SrcReg) == &X86::VK1RegClass) { unsigned KSrcReg = SrcReg; - SrcReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass - : &X86::GR8_ABCD_LRegClass); + SrcReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), SrcReg) .addReg(KSrcReg); + SrcReg = fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, + X86::sub_8bit); } SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false); SrcVT = MVT::i8; @@ -1580,11 +1586,12 @@ // In case ResultReg is a K register, COPY to a GPR if (MRI.getRegClass(ResultReg) == &X86::VK1RegClass) { unsigned KResultReg = ResultReg; - ResultReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass - : &X86::GR8_ABCD_LRegClass); + ResultReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(KResultReg); + ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true, + X86::sub_8bit); } // Set the high bits to zero. @@ -1768,11 +1775,12 @@ // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) { unsigned KOpReg = OpReg; - OpReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass - : &X86::GR8_ABCD_LRegClass); + OpReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), OpReg) .addReg(KOpReg); + OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(OpReg) @@ -2113,11 +2121,12 @@ // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { unsigned KCondReg = CondReg; - CondReg = createResultReg(Subtarget->is64Bit() ? - &X86::GR8RegClass : &X86::GR8_ABCD_LRegClass); + CondReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CondReg) .addReg(KCondReg, getKillRegState(CondIsKill)); + CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(CondReg, getKillRegState(CondIsKill)) @@ -2327,11 +2336,12 @@ // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { unsigned KCondReg = CondReg; - CondReg = createResultReg(Subtarget->is64Bit() ? - &X86::GR8RegClass : &X86::GR8_ABCD_LRegClass); + CondReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CondReg) .addReg(KCondReg, getKillRegState(CondIsKill)); + CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(CondReg, getKillRegState(CondIsKill)) @@ -3307,6 +3317,16 @@ // Handle zero-extension from i1 to i8, which is common. if (ArgVT == MVT::i1) { + // In case SrcReg is a K register, COPY to a GPR + if (MRI.getRegClass(ArgReg) == &X86::VK1RegClass) { + unsigned KArgReg = ArgReg; + ArgReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ArgReg) + .addReg(KArgReg); + ArgReg = fastEmitInst_extractsubreg(MVT::i8, ArgReg, /*Kill=*/true, + X86::sub_8bit); + } // Set the high bits to zero. ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false); ArgVT = MVT::i8; @@ -3642,6 +3662,13 @@ switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type"); case MVT::i1: + if (Subtarget->hasAVX512()) { + // Need to copy to a VK1 register. + unsigned ResultReg = createResultReg(&X86::VK1RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg); + return ResultReg; + } case MVT::i8: return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, X86::sub_8bit); @@ -3663,7 +3690,12 @@ unsigned Opc = 0; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type"); - case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH; + case MVT::i1: + // TODO: Support this properly. + if (Subtarget->hasAVX512()) + return 0; + VT = MVT::i8; + LLVM_FALLTHROUGH; case MVT::i8: Opc = X86::MOV8ri; break; case MVT::i16: Opc = X86::MOV16ri; break; case MVT::i32: Opc = X86::MOV32ri; break; Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -2183,28 +2183,26 @@ // GR from/to mask register def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), - (COPY_TO_REGCLASS GR16:$src, VK16)>; + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), - (COPY_TO_REGCLASS VK16:$src, GR16)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), - (COPY_TO_REGCLASS GR8:$src, VK8)>; + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), - (COPY_TO_REGCLASS VK8:$src, GR8)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), (KMOVWrk VK16:$src)>; def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), - (i32 (INSERT_SUBREG (IMPLICIT_DEF), - (i16 (COPY_TO_REGCLASS VK16:$src, GR16)), sub_16bit))>; + (COPY_TO_REGCLASS VK16:$src, GR32)>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), - (MOVZX32rr8 (COPY_TO_REGCLASS VK8:$src, GR8))>, Requires<[NoDQI]>; + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), - (i32 (INSERT_SUBREG (IMPLICIT_DEF), - (i8 (COPY_TO_REGCLASS VK8:$src, GR8)), sub_8bit))>; + (COPY_TO_REGCLASS VK8:$src, GR32)>; def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (COPY_TO_REGCLASS GR32:$src, VK32)>; @@ -3288,6 +3286,23 @@ } +multiclass avx512_store_scalar_lowering_subreg { + +def : Pat<(masked_store addr:$dst, Mask, + (_.info512.VT (insert_subvector undef, + (_.info256.VT (insert_subvector undef, + (_.info128.VT _.info128.RC:$src), + (iPTR 0))), + (iPTR 0)))), + (!cast(InstrStr#mrk) addr:$dst, + (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; + +} + multiclass avx512_load_scalar_lowering { @@ -3314,22 +3329,50 @@ } +multiclass avx512_load_scalar_lowering_subreg { + +def : Pat<(_.info128.VT (extract_subvector + (_.info512.VT (masked_load addr:$srcAddr, Mask, + (_.info512.VT (bitconvert + (v16i32 immAllZerosV))))), + (iPTR 0))), + (!cast(InstrStr#rmkz) + (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + addr:$srcAddr)>; + +def : Pat<(_.info128.VT (extract_subvector + (_.info512.VT (masked_load addr:$srcAddr, Mask, + (_.info512.VT (insert_subvector undef, + (_.info256.VT (insert_subvector undef, + (_.info128.VT (X86vzmovl _.info128.RC:$src)), + (iPTR 0))), + (iPTR 0))))), + (iPTR 0))), + (!cast(InstrStr#rmk) _.info128.RC:$src, + (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + addr:$srcAddr)>; + +} + defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; -defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, - (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>; -defm : avx512_store_scalar_lowering<"VMOVSDZ", avx512vl_f64_info, - (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>; +defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, + (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; +defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, + (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; -defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, - (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>; -defm : avx512_load_scalar_lowering<"VMOVSDZ", avx512vl_f64_info, - (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>; +defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, + (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; +defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, + (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), @@ -3340,7 +3383,7 @@ VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), - (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)), + (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; let hasSideEffects = 0 in Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -6309,8 +6309,6 @@ // SrcReg(MaskReg) -> DestReg(GR64) // SrcReg(MaskReg) -> DestReg(GR32) - // SrcReg(MaskReg) -> DestReg(GR16) - // SrcReg(MaskReg) -> DestReg(GR8) // All KMASK RegClasses hold the same k registers, can be tested against anyone. if (X86::VK16RegClass.contains(SrcReg)) { @@ -6320,21 +6318,10 @@ } if (X86::GR32RegClass.contains(DestReg)) return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk; - if (X86::GR16RegClass.contains(DestReg)) { - DestReg = getX86SubSuperRegister(DestReg, 32); - return X86::KMOVWrk; - } - if (X86::GR8RegClass.contains(DestReg)) { - assert(!isHReg(DestReg) && "Cannot move between mask and h-reg"); - DestReg = getX86SubSuperRegister(DestReg, 32); - return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk; - } } // SrcReg(GR64) -> DestReg(MaskReg) // SrcReg(GR32) -> DestReg(MaskReg) - // SrcReg(GR16) -> DestReg(MaskReg) - // SrcReg(GR8) -> DestReg(MaskReg) // All KMASK RegClasses hold the same k registers, can be tested against anyone. if (X86::VK16RegClass.contains(DestReg)) { @@ -6344,15 +6331,6 @@ } if (X86::GR32RegClass.contains(SrcReg)) return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr; - if (X86::GR16RegClass.contains(SrcReg)) { - SrcReg = getX86SubSuperRegister(SrcReg, 32); - return X86::KMOVWkr; - } - if (X86::GR8RegClass.contains(SrcReg)) { - assert(!isHReg(SrcReg) && "Cannot move between mask and h-reg"); - SrcReg = getX86SubSuperRegister(SrcReg, 32); - return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr; - } } Index: llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll +++ llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll @@ -298,7 +298,7 @@ ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 ; SKX-NEXT: vpmovw2m %xmm0, %k0 ; SKX-NEXT: movb $85, %al -; SKX-NEXT: kmovb %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: kandb %k1, %k0, %k0 ; SKX-NEXT: vpmovm2w %k0, %xmm0 ; SKX-NEXT: popq %rax Index: llvm/trunk/test/CodeGen/X86/avx512-cmp-kor-sequence.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-cmp-kor-sequence.ll +++ llvm/trunk/test/CodeGen/X86/avx512-cmp-kor-sequence.ll @@ -19,6 +19,7 @@ ; CHECK-NEXT: korw %k3, %k2, %k1 ; CHECK-NEXT: korw %k1, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq entry: %0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i16 -1, i32 4) Index: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll +++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll @@ -547,34 +547,70 @@ ret <8 x double> %b } define <8 x double> @sitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { -; NODQ-LABEL: sitof64_mask: -; NODQ: ## BB#0: -; NODQ-NEXT: kmovw %edi, %k1 -; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} -; NODQ-NEXT: retq +; KNL-LABEL: sitof64_mask: +; KNL: ## BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} +; KNL-NEXT: retq ; -; DQ-LABEL: sitof64_mask: -; DQ: ## BB#0: -; DQ-NEXT: kmovb %edi, %k1 -; DQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} -; DQ-NEXT: retq +; VLBW-LABEL: sitof64_mask: +; VLBW: ## BB#0: +; VLBW-NEXT: kmovd %edi, %k1 +; VLBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} +; VLBW-NEXT: retq +; +; VLNOBW-LABEL: sitof64_mask: +; VLNOBW: ## BB#0: +; VLNOBW-NEXT: kmovw %edi, %k1 +; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} +; VLNOBW-NEXT: retq +; +; AVX512DQ-LABEL: sitof64_mask: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: kmovw %edi, %k1 +; AVX512DQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: sitof64_mask: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} +; AVX512BW-NEXT: retq %1 = bitcast i8 %c to <8 x i1> %2 = sitofp <8 x i32> %b to <8 x double> %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a ret <8 x double> %3 } define <8 x double> @sitof64_maskz(<8 x i32> %a, i8 %b) nounwind { -; NODQ-LABEL: sitof64_maskz: -; NODQ: ## BB#0: -; NODQ-NEXT: kmovw %edi, %k1 -; NODQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} -; NODQ-NEXT: retq +; KNL-LABEL: sitof64_maskz: +; KNL: ## BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} +; KNL-NEXT: retq ; -; DQ-LABEL: sitof64_maskz: -; DQ: ## BB#0: -; DQ-NEXT: kmovb %edi, %k1 -; DQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} -; DQ-NEXT: retq +; VLBW-LABEL: sitof64_maskz: +; VLBW: ## BB#0: +; VLBW-NEXT: kmovd %edi, %k1 +; VLBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} +; VLBW-NEXT: retq +; +; VLNOBW-LABEL: sitof64_maskz: +; VLNOBW: ## BB#0: +; VLNOBW-NEXT: kmovw %edi, %k1 +; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} +; VLNOBW-NEXT: retq +; +; AVX512DQ-LABEL: sitof64_maskz: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: kmovw %edi, %k1 +; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: sitof64_maskz: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: retq %1 = bitcast i8 %b to <8 x i1> %2 = sitofp <8 x i32> %a to <8 x double> %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer @@ -859,34 +895,70 @@ ret <16 x double> %b } define <8 x double> @uitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { -; NODQ-LABEL: uitof64_mask: -; NODQ: ## BB#0: -; NODQ-NEXT: kmovw %edi, %k1 -; NODQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} -; NODQ-NEXT: retq +; KNL-LABEL: uitof64_mask: +; KNL: ## BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} +; KNL-NEXT: retq ; -; DQ-LABEL: uitof64_mask: -; DQ: ## BB#0: -; DQ-NEXT: kmovb %edi, %k1 -; DQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} -; DQ-NEXT: retq +; VLBW-LABEL: uitof64_mask: +; VLBW: ## BB#0: +; VLBW-NEXT: kmovd %edi, %k1 +; VLBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} +; VLBW-NEXT: retq +; +; VLNOBW-LABEL: uitof64_mask: +; VLNOBW: ## BB#0: +; VLNOBW-NEXT: kmovw %edi, %k1 +; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} +; VLNOBW-NEXT: retq +; +; AVX512DQ-LABEL: uitof64_mask: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: kmovw %edi, %k1 +; AVX512DQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: uitof64_mask: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} +; AVX512BW-NEXT: retq %1 = bitcast i8 %c to <8 x i1> %2 = uitofp <8 x i32> %b to <8 x double> %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a ret <8 x double> %3 } define <8 x double> @uitof64_maskz(<8 x i32> %a, i8 %b) nounwind { -; NODQ-LABEL: uitof64_maskz: -; NODQ: ## BB#0: -; NODQ-NEXT: kmovw %edi, %k1 -; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} -; NODQ-NEXT: retq +; KNL-LABEL: uitof64_maskz: +; KNL: ## BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} +; KNL-NEXT: retq ; -; DQ-LABEL: uitof64_maskz: -; DQ: ## BB#0: -; DQ-NEXT: kmovb %edi, %k1 -; DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} -; DQ-NEXT: retq +; VLBW-LABEL: uitof64_maskz: +; VLBW: ## BB#0: +; VLBW-NEXT: kmovd %edi, %k1 +; VLBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} +; VLBW-NEXT: retq +; +; VLNOBW-LABEL: uitof64_maskz: +; VLNOBW: ## BB#0: +; VLNOBW-NEXT: kmovw %edi, %k1 +; VLNOBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} +; VLNOBW-NEXT: retq +; +; AVX512DQ-LABEL: uitof64_maskz: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: kmovw %edi, %k1 +; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: uitof64_maskz: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: retq %1 = bitcast i8 %b to <8 x i1> %2 = uitofp <8 x i32> %a to <8 x double> %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer Index: llvm/trunk/test/CodeGen/X86/avx512-ext.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-ext.ll +++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll @@ -1288,11 +1288,17 @@ } define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { -; ALL-LABEL: zext_16i1_to_16xi32: -; ALL: ## BB#0: -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} -; ALL-NEXT: retq +; KNL-LABEL: zext_16i1_to_16xi32: +; KNL: ## BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: zext_16i1_to_16xi32: +; SKX: ## BB#0: +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; SKX-NEXT: retq %a = bitcast i16 %b to <16 x i1> %c = zext <16 x i1> %a to <16 x i32> ret <16 x i32> %c @@ -1307,7 +1313,7 @@ ; ; SKX-LABEL: zext_8i1_to_8xi64: ; SKX: ## BB#0: -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} ; SKX-NEXT: retq %a = bitcast i8 %b to <8 x i1> @@ -1322,13 +1328,15 @@ ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: trunc_16i8_to_16i1: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 ; SKX-NEXT: vpmovb2m %xmm0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AX %AX %EAX ; SKX-NEXT: retq %mask_b = trunc <16 x i8>%a to <16 x i1> %mask = bitcast <16 x i1> %mask_b to i16 @@ -1341,13 +1349,15 @@ ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: trunc_16i32_to_16i1: ; SKX: ## BB#0: ; SKX-NEXT: vpslld $31, %zmm0, %zmm0 ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AX %AX %EAX ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %mask_b = trunc <16 x i32>%a to <16 x i1> @@ -1386,13 +1396,15 @@ ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: trunc_8i16_to_8i1: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 ; SKX-NEXT: vpmovw2m %xmm0, %k0 -; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AL %AL %EAX ; SKX-NEXT: retq %mask_b = trunc <8 x i16>%a to <8 x i1> %mask = bitcast <8 x i1> %mask_b to i8 @@ -1420,17 +1432,31 @@ define i16 @trunc_i32_to_i1(i32 %a) { -; ALL-LABEL: trunc_i32_to_i1: -; ALL: ## BB#0: -; ALL-NEXT: andl $1, %edi -; ALL-NEXT: kmovw %edi, %k0 -; ALL-NEXT: movw $-4, %ax -; ALL-NEXT: kmovw %eax, %k1 -; ALL-NEXT: kshiftrw $1, %k1, %k1 -; ALL-NEXT: kshiftlw $1, %k1, %k1 -; ALL-NEXT: korw %k0, %k1, %k0 -; ALL-NEXT: kmovw %k0, %eax -; ALL-NEXT: retq +; KNL-LABEL: trunc_i32_to_i1: +; KNL: ## BB#0: +; KNL-NEXT: andl $1, %edi +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: movw $-4, %ax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k1 +; KNL-NEXT: kshiftlw $1, %k1, %k1 +; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX +; KNL-NEXT: retq +; +; SKX-LABEL: trunc_i32_to_i1: +; SKX: ## BB#0: +; SKX-NEXT: andl $1, %edi +; SKX-NEXT: kmovw %edi, %k0 +; SKX-NEXT: movw $-4, %ax +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: kshiftrw $1, %k1, %k1 +; SKX-NEXT: kshiftlw $1, %k1, %k1 +; SKX-NEXT: korw %k0, %k1, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AX %AX %EAX +; SKX-NEXT: retq %a_i = trunc i32 %a to i1 %maskv = insertelement <16 x i1> , i1 %a_i, i32 0 %res = bitcast <16 x i1> %maskv to i16 Index: llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll +++ llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll @@ -675,7 +675,7 @@ define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_mask_extractf64x4_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x4 $1, %zmm1, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -689,7 +689,7 @@ define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_maskz_extractf64x4_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -703,7 +703,7 @@ define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_mask_extractf32x4_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x4 $1, %zmm1, %xmm0 {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -719,7 +719,7 @@ define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_maskz_extractf32x4_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -735,7 +735,7 @@ define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) { ; SKX-LABEL: test_mm256_mask_extractf64x2_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x2 $1, %ymm1, %xmm0 {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -750,7 +750,7 @@ define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) { ; SKX-LABEL: test_mm256_maskz_extractf64x2_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -765,7 +765,7 @@ define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) { ; SKX-LABEL: test_mm256_mask_extracti64x2_epi64: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm0 {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -780,7 +780,7 @@ define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) { ; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -795,7 +795,7 @@ define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) { ; SKX-LABEL: test_mm256_mask_extractf32x4_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x4 $1, %ymm1, %xmm0 {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -810,7 +810,7 @@ define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) { ; SKX-LABEL: test_mm256_maskz_extractf32x4_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -825,7 +825,7 @@ define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) { ; SKX-LABEL: test_mm256_mask_extracti32x4_epi32: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextracti32x4 $1, %ymm1, %xmm0 {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -843,7 +843,7 @@ define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) { ; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -860,7 +860,7 @@ define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) { ; SKX-LABEL: test_mm512_mask_extractf32x8_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x8 $1, %zmm1, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -873,7 +873,7 @@ define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) { ; SKX-LABEL: test_mm512_maskz_extractf32x8_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -886,7 +886,7 @@ define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_mask_extractf64x2_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x2 $3, %zmm1, %xmm0 {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -901,7 +901,7 @@ define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) { ; SKX-LABEL: test_mm512_maskz_extractf64x2_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/avx512-fsel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-fsel.ll +++ llvm/trunk/test/CodeGen/X86/avx512-fsel.ll @@ -10,25 +10,24 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: Lcfi0: ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: vucomiss %xmm1, %xmm0 -; CHECK-NEXT: setp %cl -; CHECK-NEXT: setne %dl -; CHECK-NEXT: setnp %sil -; CHECK-NEXT: sete %dil -; CHECK-NEXT: andb %sil, %dil -; CHECK-NEXT: ## implicit-def: %R8D -; CHECK-NEXT: movb %dil, %r8b -; CHECK-NEXT: andl $1, %r8d -; CHECK-NEXT: kmovw %r8d, %k0 -; CHECK-NEXT: orb %cl, %dl -; CHECK-NEXT: ## implicit-def: %R8D -; CHECK-NEXT: movb %dl, %r8b -; CHECK-NEXT: andl $1, %r8d -; CHECK-NEXT: kmovw %r8d, %k1 -; CHECK-NEXT: kmovw %k1, %ecx -; CHECK-NEXT: testb $1, %cl -; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) ## 1-byte Spill +; CHECK-NEXT: setp %al +; CHECK-NEXT: setne %cl +; CHECK-NEXT: setnp %dl +; CHECK-NEXT: sete %sil +; CHECK-NEXT: andb %dl, %sil +; CHECK-NEXT: ## implicit-def: %EDI +; CHECK-NEXT: movb %sil, %dil +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k0 +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: ## implicit-def: %EDI +; CHECK-NEXT: movb %cl, %dil +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovw %k1, %edi +; CHECK-NEXT: movb %dil, %al +; CHECK-NEXT: testb $1, %al ; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill ; CHECK-NEXT: jne LBB0_1 ; CHECK-NEXT: jmp LBB0_2 Index: llvm/trunk/test/CodeGen/X86/avx512-gather-scatter-intrin.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ llvm/trunk/test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -14,7 +14,7 @@ define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: gather_mask_dps: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 @@ -30,7 +30,7 @@ define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: gather_mask_dpd: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 @@ -46,7 +46,7 @@ define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: gather_mask_qps: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2} ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 @@ -62,7 +62,7 @@ define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: gather_mask_qpd: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 @@ -90,7 +90,7 @@ define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: gather_mask_dd: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 @@ -106,7 +106,7 @@ define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: gather_mask_qd: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2} ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 @@ -122,7 +122,7 @@ define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: gather_mask_qq: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 @@ -138,7 +138,7 @@ define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: gather_mask_dq: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2} ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 @@ -154,7 +154,7 @@ define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { ; CHECK-LABEL: gather_mask_dpd_execdomain: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, (%rdx) ; CHECK-NEXT: vzeroupper @@ -167,7 +167,7 @@ define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { ; CHECK-LABEL: gather_mask_qpd_execdomain: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, (%rdx) ; CHECK-NEXT: vzeroupper @@ -180,7 +180,7 @@ define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) { ; CHECK-LABEL: gather_mask_dps_execdomain: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -191,7 +191,7 @@ define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) { ; CHECK-LABEL: gather_mask_qps_execdomain: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -202,7 +202,7 @@ define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: scatter_mask_dpd_execdomain: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovapd (%rdi), %zmm1 ; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1} ; CHECK-NEXT: vzeroupper @@ -215,7 +215,7 @@ define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: scatter_mask_qpd_execdomain: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovapd (%rdi), %zmm1 ; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1} ; CHECK-NEXT: vzeroupper @@ -228,7 +228,7 @@ define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: scatter_mask_dps_execdomain: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovaps (%rdi), %zmm1 ; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1} ; CHECK-NEXT: vzeroupper @@ -241,7 +241,7 @@ define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) { ; CHECK-LABEL: scatter_mask_qps_execdomain: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovaps (%rdi), %ymm1 ; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1} ; CHECK-NEXT: vzeroupper @@ -278,10 +278,10 @@ ; CHECK-NEXT: kxorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1} ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovb %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1} ; CHECK-NEXT: movb $120, %al -; CHECK-NEXT: kmovb %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -297,7 +297,7 @@ define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3div2_df: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm0 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 @@ -315,7 +315,7 @@ define <2 x i64>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3div2_di: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1} ; CHECK-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq @@ -330,7 +330,7 @@ define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3div4_df: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm0 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorpd %ymm2, %ymm2, %ymm2 @@ -348,7 +348,7 @@ define <4 x i64>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3div4_di: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 @@ -366,7 +366,7 @@ define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm0 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 @@ -384,7 +384,7 @@ define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3div4_si: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2} @@ -402,7 +402,7 @@ define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm0 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 @@ -421,7 +421,7 @@ define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3div8_si: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa %xmm0, %xmm2 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2} @@ -440,7 +440,7 @@ define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm0 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 @@ -458,7 +458,7 @@ define <2 x i64>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1} ; CHECK-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq @@ -473,7 +473,7 @@ define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm0 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorpd %ymm2, %ymm2, %ymm2 @@ -491,7 +491,7 @@ define <4 x i64>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1} ; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: retq @@ -506,7 +506,7 @@ define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 @@ -524,7 +524,7 @@ define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2} @@ -542,7 +542,7 @@ define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2 @@ -560,7 +560,7 @@ define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa %ymm0, %ymm2 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2} @@ -578,7 +578,7 @@ define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1} @@ -593,7 +593,7 @@ define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1} @@ -608,7 +608,7 @@ define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1} @@ -624,7 +624,7 @@ define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1} @@ -640,7 +640,7 @@ define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1} @@ -655,7 +655,7 @@ define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1} @@ -670,7 +670,7 @@ define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1} @@ -686,7 +686,7 @@ define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1} @@ -702,7 +702,7 @@ define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1} @@ -717,7 +717,7 @@ define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1} @@ -732,7 +732,7 @@ define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1} @@ -748,7 +748,7 @@ define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1} @@ -764,7 +764,7 @@ define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1} @@ -779,7 +779,7 @@ define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} @@ -794,7 +794,7 @@ define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1} @@ -810,7 +810,7 @@ define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) { ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} @@ -829,10 +829,10 @@ ; CHECK-NEXT: kxorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovb %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} ; CHECK-NEXT: movb $96, %al -; CHECK-NEXT: kmovb %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -853,11 +853,11 @@ ; CHECK-NEXT: vmovaps %zmm1, %zmm3 ; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1} ; CHECK-NEXT: movw $1, %ax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: vmovaps %zmm1, %zmm4 ; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm4 {%k1} ; CHECK-NEXT: movw $220, %ax -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm0 ; CHECK-NEXT: vaddps %zmm4, %zmm1, %zmm1 Index: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll +++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll @@ -343,6 +343,7 @@ ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: korw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: test13: @@ -352,11 +353,12 @@ ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovw %eax, %k0 ; SKX-NEXT: movw $-4, %ax -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: kshiftrw $1, %k1, %k1 ; SKX-NEXT: kshiftlw $1, %k1, %k1 ; SKX-NEXT: korw %k0, %k1, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AX %AX %EAX ; SKX-NEXT: retq %cmp_res = icmp ult i32 %a, %b %maskv = insertelement <16 x i1> , i1 %cmp_res, i32 0 @@ -433,6 +435,7 @@ ; KNL-NEXT: vpslld $31, %zmm2, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: test16: @@ -440,13 +443,14 @@ ; SKX-NEXT: movzbl (%rdi), %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: kmovw %esi, %k1 +; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: vpmovm2d %k1, %zmm0 ; SKX-NEXT: vpmovm2d %k0, %zmm1 ; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15] ; SKX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; SKX-NEXT: vpmovd2m %zmm2, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AX %AX %EAX ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %x = load i1 , i1 * %addr, align 128 @@ -470,6 +474,7 @@ ; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: test17: @@ -477,13 +482,14 @@ ; SKX-NEXT: movzbl (%rdi), %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: kmovb %esi, %k1 +; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: vpmovm2q %k1, %zmm0 ; SKX-NEXT: vpmovm2q %k0, %zmm1 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7] ; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; SKX-NEXT: vpmovq2m %zmm2, %k0 -; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AL %AL %EAX ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %x = load i1 , i1 * %addr, align 128 @@ -1336,6 +1342,7 @@ ; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: test_iinsertelement_v4i1: @@ -1350,7 +1357,8 @@ ; SKX-NEXT: vpbroadcastq %xmm1, %xmm1 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] ; SKX-NEXT: vpmovd2m %xmm0, %k0 -; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AL %AL %EAX ; SKX-NEXT: retq %cmp_res_i1 = icmp ult i32 %a, %b %cmp_cmp_vec = icmp ult <4 x i32> %x, %y @@ -1381,6 +1389,7 @@ ; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: test_iinsertelement_v2i1: @@ -1394,7 +1403,8 @@ ; SKX-NEXT: kshiftrw $1, %k1, %k1 ; SKX-NEXT: kshiftlw $1, %k0, %k0 ; SKX-NEXT: korw %k0, %k1, %k0 -; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AL %AL %EAX ; SKX-NEXT: retq %cmp_res_i1 = icmp ult i32 %a, %b %cmp_cmp_vec = icmp ult <2 x i64> %x, %y Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -504,6 +504,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) ret i16 %res @@ -515,6 +516,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) ret i16 %res @@ -527,6 +529,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) ret i8 %res @@ -538,6 +541,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) ret i8 %res @@ -550,6 +554,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) ret i16 %res @@ -561,6 +566,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) ret i16 %res @@ -573,6 +579,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) ret i8 %res @@ -584,6 +591,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) ret i8 %res Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -40,6 +40,7 @@ ; CHECK-NEXT: kandw %k0, %k1, %k0 ; CHECK-NEXT: kandw %k0, %k2, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) @@ -57,6 +58,7 @@ ; CHECK-NEXT: kandnw %k2, %k1, %k1 ; CHECK-NEXT: kandnw %k0, %k1, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1) @@ -70,6 +72,7 @@ ; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: knotw %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) ret i16 %res @@ -86,6 +89,7 @@ ; CHECK-NEXT: korw %k0, %k1, %k0 ; CHECK-NEXT: korw %k0, %k2, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1) @@ -101,6 +105,7 @@ ; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: kunpckbw %k1, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) ret i16 %res @@ -117,6 +122,7 @@ ; CHECK-NEXT: kxorw %k0, %k1, %k0 ; CHECK-NEXT: kxorw %k0, %k2, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1) @@ -134,6 +140,7 @@ ; CHECK-NEXT: kxorw %k0, %k1, %k0 ; CHECK-NEXT: kxorw %k0, %k2, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8) %t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1) @@ -714,6 +721,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8) ret i16 %res @@ -725,6 +733,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4) ret i8 %res @@ -792,11 +801,12 @@ ; CHECK-LABEL: test_vptestmq: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: addb %cl, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1) %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m) @@ -809,9 +819,9 @@ ; CHECK-LABEL: test_vptestmd: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ## kill: %AX %AX %EAX @@ -838,29 +848,29 @@ define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: test_cmp_d_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k3 -; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k4 -; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k5 -; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k6 -; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k7 -; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k2 -; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k4, %eax -; CHECK-NEXT: kmovw %k3, %ecx +; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k2 +; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k3 +; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 +; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k5 +; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k6 +; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k7 +; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k5, %eax +; CHECK-NEXT: kmovw %k2, %eax ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k6, %eax +; CHECK-NEXT: kmovw %k3, %eax ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k7, %eax +; CHECK-NEXT: kmovw %k4, %eax ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: kmovw %k7, %eax ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) @@ -885,30 +895,30 @@ define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { ; CHECK-LABEL: test_mask_cmp_d_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k4 {%k3} -; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k5 {%k3} -; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k6 {%k3} -; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k7 {%k3} -; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k3} -; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k2 {%k3} -; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k1 {%k3} -; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k3 {%k3} -; CHECK-NEXT: kmovw %k5, %eax -; CHECK-NEXT: kmovw %k4, %ecx +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k2 {%k1} +; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k3 {%k1} +; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k4 {%k1} +; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k5 {%k1} +; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k6 {%k1} +; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k7 {%k1} +; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k1 {%k1} +; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k6, %eax +; CHECK-NEXT: kmovw %k3, %eax ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k7, %eax +; CHECK-NEXT: kmovw %k4, %eax ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k7, %eax ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k3, %eax +; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) @@ -935,29 +945,29 @@ define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: test_ucmp_d_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k3 -; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k4 -; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k5 -; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k6 -; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k7 -; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k2 -; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k4, %eax -; CHECK-NEXT: kmovw %k3, %ecx +; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 +; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k2 +; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k3 +; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k4 +; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k5 +; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k6 +; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k7 +; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k5, %eax +; CHECK-NEXT: kmovw %k2, %eax ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k6, %eax +; CHECK-NEXT: kmovw %k3, %eax ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k7, %eax +; CHECK-NEXT: kmovw %k4, %eax ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: kmovw %k7, %eax ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) @@ -982,30 +992,30 @@ define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { ; CHECK-LABEL: test_mask_ucmp_d_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 -; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k4 {%k3} -; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k5 {%k3} -; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k6 {%k3} -; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k7 {%k3} -; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0 {%k3} -; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k2 {%k3} -; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 {%k3} -; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k3 {%k3} -; CHECK-NEXT: kmovw %k5, %eax -; CHECK-NEXT: kmovw %k4, %ecx +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k2 {%k1} +; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k3 {%k1} +; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k4 {%k1} +; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k5 {%k1} +; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k6 {%k1} +; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k7 {%k1} +; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k1 {%k1} +; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k6, %eax +; CHECK-NEXT: kmovw %k3, %eax ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k7, %eax +; CHECK-NEXT: kmovw %k4, %eax ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k7, %eax ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k3, %eax +; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) @@ -1032,29 +1042,29 @@ define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { ; CHECK-LABEL: test_cmp_q_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k3 -; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k4 -; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k5 -; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k6 -; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k7 -; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k2 -; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k4, %eax -; CHECK-NEXT: kmovw %k3, %ecx +; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k2 +; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k3 +; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 +; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k5 +; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k6 +; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k7 +; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k5, %eax +; CHECK-NEXT: kmovw %k2, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k6, %eax +; CHECK-NEXT: kmovw %k3, %eax ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k7, %eax +; CHECK-NEXT: kmovw %k4, %eax ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: kmovw %k7, %eax ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) @@ -1079,30 +1089,30 @@ define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_cmp_q_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k4 {%k3} -; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k5 {%k3} -; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k6 {%k3} -; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k7 {%k3} -; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k3} -; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k2 {%k3} -; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k1 {%k3} -; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k3 {%k3} -; CHECK-NEXT: kmovw %k5, %eax -; CHECK-NEXT: kmovw %k4, %ecx +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k2 {%k1} +; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k3 {%k1} +; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k4 {%k1} +; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k5 {%k1} +; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k6 {%k1} +; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k7 {%k1} +; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k1 {%k1} +; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k6, %eax +; CHECK-NEXT: kmovw %k3, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k7, %eax +; CHECK-NEXT: kmovw %k4, %eax ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k7, %eax ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k3, %eax +; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) @@ -1129,29 +1139,29 @@ define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { ; CHECK-LABEL: test_ucmp_q_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k3 -; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k4 -; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k5 -; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k6 -; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k7 -; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k2 -; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k4, %eax -; CHECK-NEXT: kmovw %k3, %ecx +; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 +; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k2 +; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k3 +; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k4 +; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k5 +; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k6 +; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k7 +; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k5, %eax +; CHECK-NEXT: kmovw %k2, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k6, %eax +; CHECK-NEXT: kmovw %k3, %eax ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k7, %eax +; CHECK-NEXT: kmovw %k4, %eax ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: kmovw %k7, %eax ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) @@ -1176,30 +1186,30 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_ucmp_q_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 -; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k4 {%k3} -; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k5 {%k3} -; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k6 {%k3} -; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k7 {%k3} -; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0 {%k3} -; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k2 {%k3} -; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k3} -; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k3 {%k3} -; CHECK-NEXT: kmovw %k5, %eax -; CHECK-NEXT: kmovw %k4, %ecx +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k2 {%k1} +; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k3 {%k1} +; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k4 {%k1} +; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k5 {%k1} +; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k6 {%k1} +; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k7 {%k1} +; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k1 {%k1} +; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vmovd %ecx, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k6, %eax +; CHECK-NEXT: kmovw %k3, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k7, %eax +; CHECK-NEXT: kmovw %k4, %eax ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k2, %eax +; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k1, %eax +; CHECK-NEXT: kmovw %k7, %eax ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k3, %eax +; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) @@ -2770,9 +2780,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm1, %zmm3 -; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1} -; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 -; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 +; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 +; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vaddpd %zmm3, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) @@ -2787,9 +2797,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 -; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1} -; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 -; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 +; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 +; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) @@ -2804,9 +2814,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3 -; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1} -; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 -; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0 +; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 +; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) @@ -2859,9 +2869,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 -; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z} -; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 -; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 +; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 +; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z} +; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) @@ -2877,9 +2887,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3 -; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z} -; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 -; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0 +; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 +; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z} +; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) @@ -2894,9 +2904,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3 -; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1} -; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 -; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0 +; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 +; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vpaddd %zmm3, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) @@ -2940,8 +2950,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovqb %zmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 @@ -2974,8 +2984,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 @@ -3008,8 +3018,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 @@ -3042,8 +3052,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovqw %zmm0, %xmm0 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 @@ -3076,8 +3086,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 @@ -3110,8 +3120,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 @@ -3144,8 +3154,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovqd %zmm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 @@ -3178,8 +3188,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 @@ -3212,8 +3222,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 @@ -3246,8 +3256,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovdb %zmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 @@ -3280,8 +3290,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 @@ -3314,8 +3324,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 @@ -3348,8 +3358,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovdw %zmm0, %ymm0 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 @@ -3382,8 +3392,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 @@ -3416,8 +3426,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 @@ -3804,8 +3814,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1] ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1] ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1] ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 @@ -4082,9 +4092,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 -; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} -; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 +; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) @@ -4099,9 +4109,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 -; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z} -; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 +; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) @@ -4116,9 +4126,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 -; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} -; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 +; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) @@ -4133,9 +4143,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 -; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z} -; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 -; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 +; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) @@ -4382,11 +4392,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm3 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm3 {%k1} {z} -; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1 -; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3) @@ -4402,11 +4412,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm3 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm3 {%k1} {z} -; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 -; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3) @@ -4462,11 +4472,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm3 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1} -; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm3 {%k1} {z} -; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 -; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3) @@ -4482,11 +4492,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm3 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1} -; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm3 {%k1} {z} -; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 -; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3) @@ -4502,11 +4512,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm3 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1} -; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm3 {%k1} {z} -; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 -; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) %res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3) @@ -4522,11 +4532,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm3 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1} -; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm3 {%k1} {z} -; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1 -; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3) @@ -4659,13 +4669,13 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 -; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} {z} +; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 ; CHECK-NEXT: vmovaps %zmm0, %zmm4 -; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4 +; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4 {%k1} {z} ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 -; CHECK-NEXT: vaddps %zmm4, %zmm0, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm4, %zmm0 +; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) %res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 8) @@ -4729,9 +4739,9 @@ ; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ## kill: %AX %AX %EAX @@ -4748,11 +4758,12 @@ ; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: addb %cl, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1) @@ -4764,8 +4775,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} ; CHECK-NEXT: vpbroadcastd %edi, %zmm1 {%k1} {z} +; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1} ; CHECK-NEXT: vpbroadcastd %edi, %zmm2 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 @@ -4784,8 +4795,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} ; CHECK-NEXT: vpbroadcastq %rdi, %zmm1 {%k1} {z} +; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} ; CHECK-NEXT: vpbroadcastq %rdi, %zmm2 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 Index: llvm/trunk/test/CodeGen/X86/avx512-logic.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-logic.ll +++ llvm/trunk/test/CodeGen/X86/avx512-logic.ll @@ -299,7 +299,7 @@ ; ; SKX-LABEL: masked_and_v16f32: ; SKX: ## BB#0: -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 ; SKX-NEXT: retq @@ -324,7 +324,7 @@ ; ; SKX-LABEL: masked_or_v16f32: ; SKX: ## BB#0: -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 ; SKX-NEXT: retq @@ -349,7 +349,7 @@ ; ; SKX-LABEL: masked_xor_v16f32: ; SKX: ## BB#0: -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 ; SKX-NEXT: retq @@ -374,7 +374,7 @@ ; ; SKX-LABEL: masked_and_v8f64: ; SKX: ## BB#0: -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 ; SKX-NEXT: retq @@ -399,7 +399,7 @@ ; ; SKX-LABEL: masked_or_v8f64: ; SKX: ## BB#0: -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 ; SKX-NEXT: retq @@ -424,7 +424,7 @@ ; ; SKX-LABEL: masked_xor_v8f64: ; SKX: ## BB#0: -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 ; SKX-NEXT: retq @@ -448,7 +448,7 @@ ; ; SKX-LABEL: test_mm512_mask_and_epi32: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq entry: @@ -470,7 +470,7 @@ ; ; SKX-LABEL: test_mm512_mask_or_epi32: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq entry: @@ -492,7 +492,7 @@ ; ; SKX-LABEL: test_mm512_mask_xor_epi32: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq entry: @@ -514,7 +514,7 @@ ; ; SKX-LABEL: test_mm512_mask_xor_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq entry: @@ -536,7 +536,7 @@ ; ; SKX-LABEL: test_mm512_maskz_xor_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -558,7 +558,7 @@ ; ; SKX-LABEL: test_mm512_mask_xor_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq entry: @@ -580,7 +580,7 @@ ; ; SKX-LABEL: test_mm512_maskz_xor_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -602,7 +602,7 @@ ; ; SKX-LABEL: test_mm512_mask_or_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} ; SKX-NEXT: retq entry: @@ -624,7 +624,7 @@ ; ; SKX-LABEL: test_mm512_maskz_or_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -646,7 +646,7 @@ ; ; SKX-LABEL: test_mm512_mask_or_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} ; SKX-NEXT: retq entry: @@ -668,7 +668,7 @@ ; ; SKX-LABEL: test_mm512_maskz_or_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -690,7 +690,7 @@ ; ; SKX-LABEL: test_mm512_mask_and_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} ; SKX-NEXT: retq entry: @@ -712,7 +712,7 @@ ; ; SKX-LABEL: test_mm512_maskz_and_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -734,7 +734,7 @@ ; ; SKX-LABEL: test_mm512_mask_and_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} ; SKX-NEXT: retq entry: @@ -756,7 +756,7 @@ ; ; SKX-LABEL: test_mm512_maskz_and_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -778,7 +778,7 @@ ; ; SKX-LABEL: test_mm512_mask_andnot_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq entry: @@ -801,7 +801,7 @@ ; ; SKX-LABEL: test_mm512_maskz_andnot_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -824,7 +824,7 @@ ; ; SKX-LABEL: test_mm512_mask_andnot_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq entry: @@ -847,7 +847,7 @@ ; ; SKX-LABEL: test_mm512_maskz_andnot_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} ; SKX-NEXT: retq entry: Index: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll +++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll @@ -6,12 +6,37 @@ define i16 @mask16(i16 %x) { -; CHECK-LABEL: mask16: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq +; KNL-LABEL: mask16: +; KNL: ## BB#0: +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: knotw %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX +; KNL-NEXT: retq +; +; SKX-LABEL: mask16: +; SKX: ## BB#0: +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: knotw %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AX %AX %EAX +; SKX-NEXT: retq +; +; AVX512BW-LABEL: mask16: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: knotw %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: ## kill: %AX %AX %EAX +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: mask16: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: kmovw %edi, %k0 +; AVX512DQ-NEXT: knotw %k0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: ## kill: %AX %AX %EAX +; AVX512DQ-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, %ret = bitcast <16 x i1> %m1 to i16 @@ -19,12 +44,33 @@ } define i32 @mask16_zext(i16 %x) { -; CHECK-LABEL: mask16_zext: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq +; KNL-LABEL: mask16_zext: +; KNL: ## BB#0: +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: knotw %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: retq +; +; SKX-LABEL: mask16_zext: +; SKX: ## BB#0: +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: knotw %k0, %k0 +; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: retq +; +; AVX512BW-LABEL: mask16_zext: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: knotw %k0, %k0 +; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: mask16_zext: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: kmovw %edi, %k0 +; AVX512DQ-NEXT: knotw %k0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, %m2 = bitcast <16 x i1> %m1 to i16 @@ -38,27 +84,31 @@ ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: knotw %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: mask8: ; SKX: ## BB#0: -; SKX-NEXT: kmovb %edi, %k0 +; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: knotb %k0, %k0 -; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AL %AL %EAX ; SKX-NEXT: retq ; ; AVX512BW-LABEL: mask8: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovw %edi, %k0 +; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: knotw %k0, %k0 -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: ## kill: %AL %AL %EAX ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: mask8: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: kmovb %edi, %k0 +; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: knotb %k0, %k0 -; AVX512DQ-NEXT: kmovb %k0, %eax +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: ## kill: %AL %AL %EAX ; AVX512DQ-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, @@ -77,22 +127,22 @@ ; ; SKX-LABEL: mask8_zext: ; SKX: ## BB#0: -; SKX-NEXT: kmovb %edi, %k0 +; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: knotb %k0, %k0 ; SKX-NEXT: kmovb %k0, %eax ; SKX-NEXT: retq ; ; AVX512BW-LABEL: mask8_zext: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovw %edi, %k0 +; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: knotw %k0, %k0 -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movzbl %al, %eax ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: mask8_zext: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: kmovb %edi, %k0 +; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: knotb %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, %eax ; AVX512DQ-NEXT: retq @@ -177,15 +227,49 @@ } define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { -; CHECK-LABEL: mand16_mem: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw (%rdi), %k0 -; CHECK-NEXT: kmovw (%rsi), %k1 -; CHECK-NEXT: kandw %k1, %k0, %k2 -; CHECK-NEXT: kxorw %k1, %k0, %k0 -; CHECK-NEXT: korw %k0, %k2, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq +; KNL-LABEL: mand16_mem: +; KNL: ## BB#0: +; KNL-NEXT: kmovw (%rdi), %k0 +; KNL-NEXT: kmovw (%rsi), %k1 +; KNL-NEXT: kandw %k1, %k0, %k2 +; KNL-NEXT: kxorw %k1, %k0, %k0 +; KNL-NEXT: korw %k0, %k2, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX +; KNL-NEXT: retq +; +; SKX-LABEL: mand16_mem: +; SKX: ## BB#0: +; SKX-NEXT: kmovw (%rdi), %k0 +; SKX-NEXT: kmovw (%rsi), %k1 +; SKX-NEXT: kandw %k1, %k0, %k2 +; SKX-NEXT: kxorw %k1, %k0, %k0 +; SKX-NEXT: korw %k0, %k2, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AX %AX %EAX +; SKX-NEXT: retq +; +; AVX512BW-LABEL: mand16_mem: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovw (%rdi), %k0 +; AVX512BW-NEXT: kmovw (%rsi), %k1 +; AVX512BW-NEXT: kandw %k1, %k0, %k2 +; AVX512BW-NEXT: kxorw %k1, %k0, %k0 +; AVX512BW-NEXT: korw %k0, %k2, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: ## kill: %AX %AX %EAX +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: mand16_mem: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: kmovw (%rdi), %k0 +; AVX512DQ-NEXT: kmovw (%rsi), %k1 +; AVX512DQ-NEXT: kandw %k1, %k0, %k2 +; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 +; AVX512DQ-NEXT: korw %k0, %k2, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: ## kill: %AX %AX %EAX +; AVX512DQ-NEXT: retq %ma = load <16 x i1>, <16 x i1>* %x %mb = load <16 x i1>, <16 x i1>* %y %mc = and <16 x i1> %ma, %mb @@ -201,27 +285,31 @@ ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kshiftrw $8, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: shuf_test1: ; SKX: ## BB#0: -; SKX-NEXT: kmovw %edi, %k0 +; SKX-NEXT: kmovd %edi, %k0 ; SKX-NEXT: kshiftrw $8, %k0, %k0 -; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AL %AL %EAX ; SKX-NEXT: retq ; ; AVX512BW-LABEL: shuf_test1: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovw %edi, %k0 +; AVX512BW-NEXT: kmovd %edi, %k0 ; AVX512BW-NEXT: kshiftrw $8, %k0, %k0 -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: ## kill: %AL %AL %EAX ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: shuf_test1: ; AVX512DQ: ## BB#0: ; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0 -; AVX512DQ-NEXT: kmovb %k0, %eax +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: ## kill: %AL %AL %EAX ; AVX512DQ-NEXT: retq %v1 = bitcast i16 %v to <16 x i1> %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> @@ -526,7 +614,7 @@ ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 ; SKX-NEXT: vpmovw2m %xmm0, %k0 ; SKX-NEXT: movb $85, %al -; SKX-NEXT: kmovb %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: ktestb %k0, %k0 ; SKX-NEXT: retq @@ -536,9 +624,9 @@ ; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 ; AVX512BW-NEXT: movb $85, %al -; AVX512BW-NEXT: kmovw %eax, %k1 +; AVX512BW-NEXT: kmovd %eax, %k1 ; AVX512BW-NEXT: korw %k1, %k0, %k0 -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -549,7 +637,7 @@ ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: movb $85, %al -; AVX512DQ-NEXT: kmovb %eax, %k1 +; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: korb %k1, %k0, %k0 ; AVX512DQ-NEXT: ktestb %k0, %k0 ; AVX512DQ-NEXT: vzeroupper @@ -799,7 +887,7 @@ ; SKX-NEXT: movw $21845, %ax ## imm = 0x5555 ; SKX-NEXT: movw $1, %cx ; SKX-NEXT: cmovgw %ax, %cx -; SKX-NEXT: kmovw %ecx, %k0 +; SKX-NEXT: kmovd %ecx, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: retq ; @@ -809,7 +897,7 @@ ; AVX512BW-NEXT: movw $21845, %ax ## imm = 0x5555 ; AVX512BW-NEXT: movw $1, %cx ; AVX512BW-NEXT: cmovgw %ax, %cx -; AVX512BW-NEXT: kmovw %ecx, %k0 +; AVX512BW-NEXT: kmovd %ecx, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 ; AVX512BW-NEXT: vzeroupper @@ -1099,8 +1187,8 @@ ; ; SKX-LABEL: test18: ; SKX: ## BB#0: -; SKX-NEXT: kmovb %edi, %k0 -; SKX-NEXT: kmovw %esi, %k1 +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kshiftlw $7, %k1, %k2 ; SKX-NEXT: kshiftrw $15, %k2, %k2 ; SKX-NEXT: kshiftlw $6, %k1, %k1 @@ -1120,8 +1208,8 @@ ; ; AVX512BW-LABEL: test18: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovw %edi, %k1 -; AVX512BW-NEXT: kmovw %esi, %k2 +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: kmovd %esi, %k2 ; AVX512BW-NEXT: kshiftlw $7, %k2, %k0 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0 ; AVX512BW-NEXT: kshiftlw $6, %k2, %k2 @@ -1143,7 +1231,7 @@ ; ; AVX512DQ-LABEL: test18: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: kmovb %edi, %k0 +; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kshiftlw $7, %k1, %k2 ; AVX512DQ-NEXT: kshiftrw $15, %k2, %k2 @@ -1708,7 +1796,7 @@ ; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1 ; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: je LBB41_2 ; AVX512BW-NEXT: ## BB#1: ## %L1 @@ -3564,7 +3652,7 @@ ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movzbl %al, %eax ; AVX512BW-NEXT: addl %eax, %eax ; AVX512BW-NEXT: vzeroupper @@ -3629,13 +3717,41 @@ } define i16 @test_v16i1_add(i16 %x, i16 %y) { -; CHECK-LABEL: test_v16i1_add: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: kxorw %k1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq +; KNL-LABEL: test_v16i1_add: +; KNL: ## BB#0: +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kmovw %esi, %k1 +; KNL-NEXT: kxorw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX +; KNL-NEXT: retq +; +; SKX-LABEL: test_v16i1_add: +; SKX: ## BB#0: +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kxorw %k1, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AX %AX %EAX +; SKX-NEXT: retq +; +; AVX512BW-LABEL: test_v16i1_add: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: kxorw %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: ## kill: %AX %AX %EAX +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: test_v16i1_add: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: kmovw %edi, %k0 +; AVX512DQ-NEXT: kmovw %esi, %k1 +; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: ## kill: %AX %AX %EAX +; AVX512DQ-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = bitcast i16 %y to <16 x i1> %m2 = add <16 x i1> %m0, %m1 @@ -3644,13 +3760,41 @@ } define i16 @test_v16i1_sub(i16 %x, i16 %y) { -; CHECK-LABEL: test_v16i1_sub: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: kxorw %k1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq +; KNL-LABEL: test_v16i1_sub: +; KNL: ## BB#0: +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kmovw %esi, %k1 +; KNL-NEXT: kxorw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX +; KNL-NEXT: retq +; +; SKX-LABEL: test_v16i1_sub: +; SKX: ## BB#0: +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kxorw %k1, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AX %AX %EAX +; SKX-NEXT: retq +; +; AVX512BW-LABEL: test_v16i1_sub: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: kxorw %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: ## kill: %AX %AX %EAX +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: test_v16i1_sub: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: kmovw %edi, %k0 +; AVX512DQ-NEXT: kmovw %esi, %k1 +; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: ## kill: %AX %AX %EAX +; AVX512DQ-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = bitcast i16 %y to <16 x i1> %m2 = sub <16 x i1> %m0, %m1 @@ -3659,13 +3803,41 @@ } define i16 @test_v16i1_mul(i16 %x, i16 %y) { -; CHECK-LABEL: test_v16i1_mul: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq +; KNL-LABEL: test_v16i1_mul: +; KNL: ## BB#0: +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kmovw %esi, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX +; KNL-NEXT: retq +; +; SKX-LABEL: test_v16i1_mul: +; SKX: ## BB#0: +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kandw %k1, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AX %AX %EAX +; SKX-NEXT: retq +; +; AVX512BW-LABEL: test_v16i1_mul: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: kandw %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: ## kill: %AX %AX %EAX +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: test_v16i1_mul: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: kmovw %edi, %k0 +; AVX512DQ-NEXT: kmovw %esi, %k1 +; AVX512DQ-NEXT: kandw %k1, %k0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: ## kill: %AX %AX %EAX +; AVX512DQ-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = bitcast i16 %y to <16 x i1> %m2 = mul <16 x i1> %m0, %m1 @@ -3680,30 +3852,34 @@ ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kxorw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: test_v8i1_add: ; SKX: ## BB#0: -; SKX-NEXT: kmovb %edi, %k0 -; SKX-NEXT: kmovb %esi, %k1 +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kxorb %k1, %k0, %k0 -; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AL %AL %EAX ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v8i1_add: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovw %edi, %k0 -; AVX512BW-NEXT: kmovw %esi, %k1 +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: ## kill: %AL %AL %EAX ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v8i1_add: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: kmovb %edi, %k0 -; AVX512DQ-NEXT: kmovb %esi, %k1 +; AVX512DQ-NEXT: kmovw %edi, %k0 +; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovb %k0, %eax +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: ## kill: %AL %AL %EAX ; AVX512DQ-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = bitcast i8 %y to <8 x i1> @@ -3719,30 +3895,34 @@ ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kxorw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: test_v8i1_sub: ; SKX: ## BB#0: -; SKX-NEXT: kmovb %edi, %k0 -; SKX-NEXT: kmovb %esi, %k1 +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kxorb %k1, %k0, %k0 -; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AL %AL %EAX ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v8i1_sub: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovw %edi, %k0 -; AVX512BW-NEXT: kmovw %esi, %k1 +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: ## kill: %AL %AL %EAX ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v8i1_sub: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: kmovb %edi, %k0 -; AVX512DQ-NEXT: kmovb %esi, %k1 +; AVX512DQ-NEXT: kmovw %edi, %k0 +; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovb %k0, %eax +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: ## kill: %AL %AL %EAX ; AVX512DQ-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = bitcast i8 %y to <8 x i1> @@ -3758,30 +3938,34 @@ ; KNL-NEXT: kmovw %esi, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: test_v8i1_mul: ; SKX: ## BB#0: -; SKX-NEXT: kmovb %edi, %k0 -; SKX-NEXT: kmovb %esi, %k1 +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 ; SKX-NEXT: kandb %k1, %k0, %k0 -; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AL %AL %EAX ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_v8i1_mul: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovw %edi, %k0 -; AVX512BW-NEXT: kmovw %esi, %k1 +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: kmovd %esi, %k1 ; AVX512BW-NEXT: kandw %k1, %k0, %k0 -; AVX512BW-NEXT: kmovw %k0, %eax +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: ## kill: %AL %AL %EAX ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_v8i1_mul: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: kmovb %edi, %k0 -; AVX512DQ-NEXT: kmovb %esi, %k1 +; AVX512DQ-NEXT: kmovw %edi, %k0 +; AVX512DQ-NEXT: kmovw %esi, %k1 ; AVX512DQ-NEXT: kandb %k1, %k0, %k0 -; AVX512DQ-NEXT: kmovb %k0, %eax +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: ## kill: %AL %AL %EAX ; AVX512DQ-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = bitcast i8 %y to <8 x i1> Index: llvm/trunk/test/CodeGen/X86/avx512-regcall-Mask.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-regcall-Mask.ll +++ llvm/trunk/test/CodeGen/X86/avx512-regcall-Mask.ll @@ -251,9 +251,9 @@ } ; CHECK-LABEL: test_argv16i1: -; CHECK: kmovw %edx, %k{{[0-9]+}} -; CHECK: kmovw %ecx, %k{{[0-9]+}} -; CHECK: kmovw %eax, %k{{[0-9]+}} +; CHECK: kmovd %edx, %k{{[0-9]+}} +; CHECK: kmovd %ecx, %k{{[0-9]+}} +; CHECK: kmovd %eax, %k{{[0-9]+}} ; CHECK: ret{{l|q}} ; Test regcall when receiving arguments of v16i1 type @@ -301,9 +301,9 @@ } ; CHECK-LABEL: test_argv8i1: -; CHECK: kmovw %edx, %k{{[0-9]+}} -; CHECK: kmovw %ecx, %k{{[0-9]+}} -; CHECK: kmovw %eax, %k{{[0-9]+}} +; CHECK: kmovd %edx, %k{{[0-9]+}} +; CHECK: kmovd %ecx, %k{{[0-9]+}} +; CHECK: kmovd %eax, %k{{[0-9]+}} ; CHECK: ret{{l|q}} ; Test regcall when receiving arguments of v8i1 type @@ -339,7 +339,7 @@ ; CHECK-LABEL: caller_retv8i1: ; CHECK: call{{l|q}} {{_*}}test_retv8i1 -; CHECK: kmovw %eax, %k{{[0-9]+}} +; CHECK: kmovd %eax, %k{{[0-9]+}} ; CHECK: ret{{l|q}} ; Test regcall when processing result of v8i1 type Index: llvm/trunk/test/CodeGen/X86/avx512-select.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-select.ll +++ llvm/trunk/test/CodeGen/X86/avx512-select.ll @@ -90,6 +90,7 @@ ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: korw %k1, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %mask = load <8 x i1> , <8 x i1>* %m %a = load <8 x i1> , <8 x i1>* %a.0 @@ -120,6 +121,7 @@ ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: kandw %k1, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %mask = load <8 x i1> , <8 x i1>* %m %a = load <8 x i1> , <8 x i1>* %a.0 @@ -137,6 +139,7 @@ ; CHECK-NEXT: kandw %k0, %k1, %k0 ; CHECK-NEXT: korw %k2, %k0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> Index: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll +++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll @@ -166,6 +166,7 @@ ; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 ; KNL-NEXT: kunpckbw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: ## kill: %AX %AX %EAX ; KNL-NEXT: retq ; ; SKX-LABEL: test12: @@ -173,7 +174,8 @@ ; SKX-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 ; SKX-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 ; SKX-NEXT: kunpckbw %k0, %k1, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: ## kill: %AX %AX %EAX ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %res = icmp eq <16 x i64> %a, %b Index: llvm/trunk/test/CodeGen/X86/avx512-vpermv3-commute.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-vpermv3-commute.ll +++ llvm/trunk/test/CodeGen/X86/avx512-vpermv3-commute.ll @@ -53,7 +53,7 @@ define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpermi2d (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2 = load <16 x i32>, <16 x i32>* %x2p @@ -66,7 +66,7 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpermi2pd (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2s = load double, double* %x2ptr @@ -81,7 +81,7 @@ define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) @@ -94,7 +94,7 @@ define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpermi2q %zmm2, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) @@ -128,7 +128,7 @@ define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) @@ -138,7 +138,7 @@ define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128_broadcast(<4 x i32> %x0, <4 x i32> %x1, i32* %x2ptr, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128_broadcast: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpermi2d (%rdi){1to4}, %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %x2s = load i32, i32* %x2ptr @@ -164,7 +164,7 @@ define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) @@ -296,7 +296,7 @@ define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) @@ -306,7 +306,7 @@ define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128_load(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>* %x2p, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128_load: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpermi2b (%rdi), %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %x2 = load <16 x i8>, <16 x i8>* %x2p Index: llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll +++ llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll @@ -157,7 +157,7 @@ define <16 x i32> @vpternlog_v16i32_012_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpternlogd $114, %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask) @@ -167,7 +167,7 @@ define <16 x i32> @vpternlog_v16i32_102_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpternlogd $114, %zmm2, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -178,7 +178,7 @@ define <16 x i32> @vpternlog_v16i32_210_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -189,7 +189,7 @@ define <16 x i32> @vpternlog_v16i32_012_mask1(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_mask1: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpternlogd $78, %zmm2, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -202,7 +202,7 @@ define <16 x i32> @vpternlog_v16i32_012_mask2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_mask2: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpternlogd $58, %zmm0, %zmm1, %zmm2 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -215,7 +215,7 @@ define <16 x i32> @vpternlog_v16i32_012_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load0_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2 ; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 @@ -228,7 +228,7 @@ define <16 x i32> @vpternlog_v16i32_012_load0_mask1(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load0_mask1: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x0 = load <16 x i32>, <16 x i32>* %x0ptr @@ -241,7 +241,7 @@ define <16 x i32> @vpternlog_v16i32_012_load0_mask2(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load0_mask2: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -255,7 +255,7 @@ define <16 x i32> @vpternlog_v16i32_012_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load1_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x1 = load <16 x i32>, <16 x i32>* %x1ptr @@ -266,7 +266,7 @@ define <16 x i32> @vpternlog_v16i32_012_load1_mask2(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load1_mask2: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -280,7 +280,7 @@ define <16 x i32> @vpternlog_v16i32_012_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load2_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x2 = load <16 x i32>, <16 x i32>* %x2ptr @@ -291,7 +291,7 @@ define <16 x i32> @vpternlog_v16i32_012_load2_mask1(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load2_mask1: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -305,7 +305,7 @@ define <16 x i32> @vpternlog_v16i32_102_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_load0_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x0 = load <16 x i32>, <16 x i32>* %x0ptr @@ -316,7 +316,7 @@ define <16 x i32> @vpternlog_v16i32_102_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_load1_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2 ; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 @@ -329,7 +329,7 @@ define <16 x i32> @vpternlog_v16i32_102_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_load2_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -341,7 +341,7 @@ define <16 x i32> @vpternlog_v16i32_210_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_load0_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -353,7 +353,7 @@ define <16 x i32> @vpternlog_v16i32_210_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_load1_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -365,7 +365,7 @@ define <16 x i32> @vpternlog_v16i32_210_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_load2_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2 ; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 @@ -378,7 +378,7 @@ define <16 x i32> @vpternlog_v16i32_021_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_load0_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2 ; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 @@ -391,7 +391,7 @@ define <16 x i32> @vpternlog_v16i32_021_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_load1_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x1 = load <16 x i32>, <16 x i32>* %x1ptr @@ -402,7 +402,7 @@ define <16 x i32> @vpternlog_v16i32_021_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_load2_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x2 = load <16 x i32>, <16 x i32>* %x2ptr @@ -413,7 +413,7 @@ define <16 x i32> @vpternlog_v16i32_012_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpternlogd $114, %zmm2, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask) @@ -423,7 +423,7 @@ define <16 x i32> @vpternlog_v16i32_102_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpternlogd $78, %zmm2, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask) @@ -433,7 +433,7 @@ define <16 x i32> @vpternlog_v16i32_210_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpternlogd $78, %zmm0, %zmm2, %zmm1 {%k1} {z} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -444,7 +444,7 @@ define <16 x i32> @vpternlog_v16i32_012_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load0_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x0 = load <16 x i32>, <16 x i32>* %x0ptr @@ -455,7 +455,7 @@ define <16 x i32> @vpternlog_v16i32_012_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load1_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x1 = load <16 x i32>, <16 x i32>* %x1ptr @@ -466,7 +466,7 @@ define <16 x i32> @vpternlog_v16i32_012_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_load2_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2 = load <16 x i32>, <16 x i32>* %x2ptr @@ -477,7 +477,7 @@ define <16 x i32> @vpternlog_v16i32_102_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_load0_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x0 = load <16 x i32>, <16 x i32>* %x0ptr @@ -488,7 +488,7 @@ define <16 x i32> @vpternlog_v16i32_102_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_load1_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x1 = load <16 x i32>, <16 x i32>* %x1ptr @@ -499,7 +499,7 @@ define <16 x i32> @vpternlog_v16i32_102_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_load2_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2 = load <16 x i32>, <16 x i32>* %x2ptr @@ -510,7 +510,7 @@ define <16 x i32> @vpternlog_v16i32_210_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_load0_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x0 = load <16 x i32>, <16 x i32>* %x0ptr @@ -521,7 +521,7 @@ define <16 x i32> @vpternlog_v16i32_210_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_load1_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $92, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x1 = load <16 x i32>, <16 x i32>* %x1ptr @@ -532,7 +532,7 @@ define <16 x i32> @vpternlog_v16i32_210_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_load2_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2 = load <16 x i32>, <16 x i32>* %x2ptr @@ -543,7 +543,7 @@ define <16 x i32> @vpternlog_v16i32_021_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_load0_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x0 = load <16 x i32>, <16 x i32>* %x0ptr @@ -554,7 +554,7 @@ define <16 x i32> @vpternlog_v16i32_021_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_load1_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x1 = load <16 x i32>, <16 x i32>* %x1ptr @@ -565,7 +565,7 @@ define <16 x i32> @vpternlog_v16i32_021_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_load2_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2 = load <16 x i32>, <16 x i32>* %x2ptr @@ -684,7 +684,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2 ; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 @@ -699,7 +699,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x1scalar = load i32, i32* %x1ptr @@ -712,7 +712,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_broadcast2_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x2scalar = load i32, i32* %x2ptr @@ -725,7 +725,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_broadcast0_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x0scalar = load i32, i32* %x0ptr @@ -738,7 +738,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_broadcast1_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2 ; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 @@ -753,7 +753,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_broadcast2_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -767,7 +767,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_broadcast0_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -781,7 +781,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_broadcast1_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -795,7 +795,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_broadcast2_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2 ; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 @@ -810,7 +810,7 @@ define <16 x i32> @vpternlog_v16i32_021_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_broadcast0_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2 ; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 @@ -825,7 +825,7 @@ define <16 x i32> @vpternlog_v16i32_021_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_broadcast1_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x1scalar = load i32, i32* %x1ptr @@ -838,7 +838,7 @@ define <16 x i32> @vpternlog_v16i32_021_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_broadcast2_mask: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x2scalar = load i32, i32* %x2ptr @@ -851,7 +851,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $46, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x0scalar = load i32, i32* %x0ptr @@ -864,7 +864,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x1scalar = load i32, i32* %x1ptr @@ -877,7 +877,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_broadcast2_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2scalar = load i32, i32* %x2ptr @@ -890,7 +890,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_broadcast0_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x0scalar = load i32, i32* %x0ptr @@ -903,7 +903,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_broadcast1_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $46, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x1scalar = load i32, i32* %x1ptr @@ -916,7 +916,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_102_broadcast2_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $78, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2scalar = load i32, i32* %x2ptr @@ -929,7 +929,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_broadcast0_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $78, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x0scalar = load i32, i32* %x0ptr @@ -942,7 +942,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_broadcast1_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $92, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x1scalar = load i32, i32* %x1ptr @@ -955,7 +955,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_210_broadcast2_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $58, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2scalar = load i32, i32* %x2ptr @@ -968,7 +968,7 @@ define <16 x i32> @vpternlog_v16i32_021_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_broadcast0_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $58, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x0scalar = load i32, i32* %x0ptr @@ -981,7 +981,7 @@ define <16 x i32> @vpternlog_v16i32_021_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_broadcast1_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x1scalar = load i32, i32* %x1ptr @@ -994,7 +994,7 @@ define <16 x i32> @vpternlog_v16i32_021_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_021_broadcast2_maskz: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %x2scalar = load i32, i32* %x2ptr @@ -1007,7 +1007,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask1(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask1: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $92, (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %x0scalar = load i32, i32* %x0ptr @@ -1022,7 +1022,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask2(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask2: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1038,7 +1038,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast1_mask2(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_mask2: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1054,7 +1054,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast2_mask1(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { ; CHECK-LABEL: vpternlog_v16i32_012_broadcast2_mask1: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -2132,7 +2132,7 @@ define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovw %edi, %k1 +; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0 Index: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll @@ -24,13 +24,13 @@ ; X32-LABEL: test_mm_mask_broadcastb_epi8: ; X32: # BB#0: ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax -; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: kmovd %eax, %k1 ; X32-NEXT: vpbroadcastb %xmm1, %xmm0 {%k1} ; X32-NEXT: retl ; ; X64-LABEL: test_mm_mask_broadcastb_epi8: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vpbroadcastb %xmm1, %xmm0 {%k1} ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> @@ -46,13 +46,13 @@ ; X32-LABEL: test_mm_maskz_broadcastb_epi8: ; X32: # BB#0: ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax -; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: kmovd %eax, %k1 ; X32-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} ; X32-NEXT: retl ; ; X64-LABEL: test_mm_maskz_broadcastb_epi8: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq %arg0 = bitcast i16 %a0 to <16 x i1> @@ -142,13 +142,13 @@ ; X32-LABEL: test_mm_mask_broadcastw_epi16: ; X32: # BB#0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: kmovd %eax, %k1 ; X32-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1} ; X32-NEXT: retl ; ; X64-LABEL: test_mm_mask_broadcastw_epi16: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1} ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <8 x i16> @@ -164,13 +164,13 @@ ; X32-LABEL: test_mm_maskz_broadcastw_epi16: ; X32: # BB#0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: kmovd %eax, %k1 ; X32-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} ; X32-NEXT: retl ; ; X64-LABEL: test_mm_maskz_broadcastw_epi16: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq %arg0 = bitcast i8 %a0 to <8 x i1> @@ -201,13 +201,13 @@ ; X32-LABEL: test_mm256_mask_broadcastw_epi16: ; X32: # BB#0: ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax -; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: kmovd %eax, %k1 ; X32-NEXT: vpbroadcastw %xmm1, %ymm0 {%k1} ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_mask_broadcastw_epi16: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vpbroadcastw %xmm1, %ymm0 {%k1} ; X64-NEXT: retq %arg0 = bitcast <4 x i64> %a0 to <16 x i16> @@ -223,13 +223,13 @@ ; X32-LABEL: test_mm256_maskz_broadcastw_epi16: ; X32: # BB#0: ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax -; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: kmovd %eax, %k1 ; X32-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_broadcastw_epi16: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} ; X64-NEXT: retq %arg0 = bitcast i16 %a0 to <16 x i1> Index: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -27,7 +27,7 @@ ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xd0] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8] ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc9] @@ -47,7 +47,7 @@ ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xd0] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8] ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0] ; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc9] @@ -67,7 +67,7 @@ ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xd0] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8] ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc9] @@ -126,7 +126,7 @@ define void@test_int_x86_avx512_mask_storeu_b_128(i8* %ptr1, i8* %ptr2, <16 x i8> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] ; CHECK-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07] ; CHECK-NEXT: vmovdqu %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -154,7 +154,7 @@ define void@test_int_x86_avx512_mask_storeu_w_128(i8* %ptr1, i8* %ptr2, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] ; CHECK-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07] ; CHECK-NEXT: vmovdqu %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -168,7 +168,7 @@ define void@test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] ; CHECK-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07] ; CHECK-NEXT: vmovdqu %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -183,7 +183,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_loadu_w_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] ; CHECK-NEXT: vmovdqu16 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0x06] ; CHECK-NEXT: vmovdqu16 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x6f,0x0f] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] @@ -201,7 +201,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_loadu_w_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovdqu (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] ; CHECK-NEXT: vmovdqu16 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x6f,0x06] ; CHECK-NEXT: vmovdqu16 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x0f] ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] @@ -219,7 +219,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_loadu_b_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] -; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] +; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] ; CHECK-NEXT: vmovdqu8 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x06] ; CHECK-NEXT: vmovdqu8 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x0f] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] @@ -256,7 +256,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x02] ; CHECK-NEXT: ## xmm3 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02] ; CHECK-NEXT: ## xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02] @@ -302,7 +302,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xd0,0x03] ; CHECK-NEXT: ## xmm2 = xmm0[0,1,2,3,7,4,4,4] -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4] ; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03] @@ -325,7 +325,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x70,0xd0,0x03] ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] ; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03] @@ -348,7 +348,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xd0,0x03] ; CHECK-NEXT: ## xmm2 = xmm0[3,0,0,0,4,5,6,7] -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7] ; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03] @@ -371,7 +371,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xff,0x70,0xd0,0x03] ; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] ; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03] @@ -414,7 +414,8 @@ ; CHECK-LABEL: test_pcmpeq_w_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) ret i16 %res @@ -423,9 +424,10 @@ define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_pcmpeq_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) ret i16 %res @@ -460,7 +462,8 @@ ; CHECK-LABEL: test_pcmpgt_w_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) ret i16 %res @@ -469,9 +472,10 @@ define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_pcmpgt_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) ret i16 %res @@ -486,7 +490,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] @@ -504,7 +508,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] @@ -558,7 +562,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] @@ -576,7 +580,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] @@ -594,7 +598,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x61,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] @@ -612,7 +616,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x69,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] ; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] @@ -635,7 +639,7 @@ define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_add_epi16_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -646,7 +650,7 @@ define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { ; CHECK-LABEL: test_mask_add_epi16_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) @@ -666,7 +670,7 @@ define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_add_epi16_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -678,7 +682,7 @@ define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_add_epi16_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b @@ -700,7 +704,7 @@ define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_add_epi16_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -711,7 +715,7 @@ define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_add_epi16_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) @@ -731,7 +735,7 @@ define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_add_epi16_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -743,7 +747,7 @@ define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_add_epi16_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b @@ -765,7 +769,7 @@ define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_sub_epi16_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -776,7 +780,7 @@ define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { ; CHECK-LABEL: test_mask_sub_epi16_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) @@ -796,7 +800,7 @@ define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_sub_epi16_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -808,7 +812,7 @@ define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_sub_epi16_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b @@ -830,7 +834,7 @@ define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_sub_epi16_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -841,7 +845,7 @@ define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_sub_epi16_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) @@ -861,7 +865,7 @@ define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_sub_epi16_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -873,7 +877,7 @@ define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_sub_epi16_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b @@ -1090,7 +1094,7 @@ define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi16_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1101,7 +1105,7 @@ define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi16_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) @@ -1121,7 +1125,7 @@ define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi16_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1133,7 +1137,7 @@ define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi16_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b @@ -1155,7 +1159,7 @@ define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_mullo_epi16_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1166,7 +1170,7 @@ define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_mullo_epi16_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) @@ -1186,7 +1190,7 @@ define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_mullo_epi16_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1198,7 +1202,7 @@ define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_mullo_epi16_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b @@ -1213,7 +1217,7 @@ define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1] ; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] @@ -1246,7 +1250,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_w_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1] ; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1261,7 +1265,7 @@ define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1] ; CHECK-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -1277,7 +1281,7 @@ define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1] ; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] @@ -1310,7 +1314,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_w_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1] ; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1325,7 +1329,7 @@ define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1] ; CHECK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -1341,7 +1345,7 @@ define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1] ; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] @@ -1374,7 +1378,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_w_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1] ; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1389,7 +1393,7 @@ define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpminsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1] ; CHECK-NEXT: vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -1405,7 +1409,7 @@ define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1] ; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] @@ -1438,7 +1442,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_w_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1] ; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1453,7 +1457,7 @@ define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpminuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1] ; CHECK-NEXT: vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -1470,7 +1474,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1] ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1] ; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xcb] @@ -1490,7 +1494,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1] ; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1] ; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xcb] @@ -1510,7 +1514,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1] ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] @@ -1530,7 +1534,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1] ; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -1550,7 +1554,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1] ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] @@ -1570,7 +1574,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1] ; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -1590,7 +1594,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x03] -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] ; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x03] ; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xca] @@ -1610,7 +1614,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x03] -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] ; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x03] ; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xca] @@ -1630,7 +1634,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsraw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x03] -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] ; CHECK-NEXT: vpsraw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xe0,0x03] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] @@ -1650,7 +1654,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsraw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x03] -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] ; CHECK-NEXT: vpsraw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xe0,0x03] ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] @@ -1670,7 +1674,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsllw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x03] -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] ; CHECK-NEXT: vpsllw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xf0,0x03] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] @@ -1690,7 +1694,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsllw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x03] -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] ; CHECK-NEXT: vpsllw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xf0,0x03] ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] @@ -1710,7 +1714,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_b_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1] ; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1743,7 +1747,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovzxbw %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xd0] ; CHECK-NEXT: ## xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0] @@ -1766,7 +1770,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovzxbw %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xd0] ; CHECK-NEXT: ## ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8] ; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; CHECK-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0] @@ -1789,7 +1793,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xd0] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8] ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] @@ -1809,7 +1813,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0xd0] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8] ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0] ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] @@ -1829,7 +1833,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xd0] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8] ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0] ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] @@ -1849,7 +1853,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovsxdq %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x25,0xd0] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8] ; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0] ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] @@ -1875,7 +1879,7 @@ define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1886,7 +1890,7 @@ define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) @@ -1906,7 +1910,7 @@ define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1918,7 +1922,7 @@ define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b @@ -1941,7 +1945,7 @@ define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmbk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1955,7 +1959,7 @@ define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmbkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b @@ -1979,7 +1983,7 @@ define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1990,7 +1994,7 @@ define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) @@ -2010,7 +2014,7 @@ define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2022,7 +2026,7 @@ define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b @@ -2045,7 +2049,7 @@ define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmbk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2059,7 +2063,7 @@ define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmbkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b @@ -2083,7 +2087,7 @@ define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi16_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2094,7 +2098,7 @@ define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi16_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) @@ -2114,7 +2118,7 @@ define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi16_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2126,7 +2130,7 @@ define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi16_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b @@ -2214,7 +2218,7 @@ define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2225,7 +2229,7 @@ define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) @@ -2245,7 +2249,7 @@ define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2257,7 +2261,7 @@ define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b @@ -2280,7 +2284,7 @@ define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmbk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2294,7 +2298,7 @@ define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmbkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b @@ -2318,7 +2322,7 @@ define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2329,7 +2333,7 @@ define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) @@ -2349,7 +2353,7 @@ define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2361,7 +2365,7 @@ define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b @@ -2384,7 +2388,7 @@ define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmbk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2398,7 +2402,7 @@ define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmbkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b @@ -2422,7 +2426,7 @@ define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi16_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2433,7 +2437,7 @@ define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi16_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) @@ -2453,7 +2457,7 @@ define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi16_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2465,7 +2469,7 @@ define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi16_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b Index: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -204,29 +204,29 @@ define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: test_cmp_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x00] -; CHECK-NEXT: vpcmpltw %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x01] -; CHECK-NEXT: vpcmplew %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe9,0x02] -; CHECK-NEXT: vpcmpunordw %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xf1,0x03] -; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xf9,0x04] -; CHECK-NEXT: vpcmpnltw %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x05] -; CHECK-NEXT: vpcmpnlew %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc9,0x06] -; CHECK-NEXT: vpcmpordw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x00] +; CHECK-NEXT: vpcmpltw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc9,0x01] +; CHECK-NEXT: vpcmplew %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x02] +; CHECK-NEXT: vpcmpunordw %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x03] +; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x04] +; CHECK-NEXT: vpcmpnltw %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe9,0x05] +; CHECK-NEXT: vpcmpnlew %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xf1,0x06] +; CHECK-NEXT: vpcmpordw %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xf9,0x07] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) @@ -251,30 +251,30 @@ define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { ; CHECK-LABEL: test_mask_cmp_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 ## encoding: [0xc5,0xf8,0x92,0xdf] -; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k4 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3f,0xe1,0x00] -; CHECK-NEXT: vpcmpltw %ymm1, %ymm0, %k5 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3f,0xe9,0x01] -; CHECK-NEXT: vpcmplew %ymm1, %ymm0, %k6 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3f,0xf1,0x02] -; CHECK-NEXT: vpcmpunordw %ymm1, %ymm0, %k7 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3f,0xf9,0x03] -; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k0 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3f,0xc1,0x04] -; CHECK-NEXT: vpcmpnltw %ymm1, %ymm0, %k2 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3f,0xd1,0x05] -; CHECK-NEXT: vpcmpnlew %ymm1, %ymm0, %k1 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3f,0xc9,0x06] -; CHECK-NEXT: vpcmpordw %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3f,0xd9,0x07] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc1,0x00] +; CHECK-NEXT: vpcmpltw %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd1,0x01] +; CHECK-NEXT: vpcmplew %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02] +; CHECK-NEXT: vpcmpunordw %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x03] +; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x04] +; CHECK-NEXT: vpcmpnltw %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xf1,0x05] +; CHECK-NEXT: vpcmpnlew %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xf9,0x06] +; CHECK-NEXT: vpcmpordw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc9,0x07] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) @@ -301,29 +301,29 @@ define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: test_ucmp_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequw %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd9,0x00] -; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x01] -; CHECK-NEXT: vpcmpleuw %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x02] -; CHECK-NEXT: vpcmpunorduw %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xf1,0x03] -; CHECK-NEXT: vpcmpnequw %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xf9,0x04] -; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleuw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x06] -; CHECK-NEXT: vpcmporduw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpcmpequw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x00] +; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x01] +; CHECK-NEXT: vpcmpleuw %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x02] +; CHECK-NEXT: vpcmpunorduw %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd9,0x03] +; CHECK-NEXT: vpcmpnequw %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x04] +; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x05] +; CHECK-NEXT: vpcmpnleuw %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xf1,0x06] +; CHECK-NEXT: vpcmporduw %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xf9,0x07] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) @@ -348,30 +348,30 @@ define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { ; CHECK-LABEL: test_mask_ucmp_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 ## encoding: [0xc5,0xf8,0x92,0xdf] -; CHECK-NEXT: vpcmpequw %ymm1, %ymm0, %k4 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3e,0xe1,0x00] -; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k5 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3e,0xe9,0x01] -; CHECK-NEXT: vpcmpleuw %ymm1, %ymm0, %k6 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3e,0xf1,0x02] -; CHECK-NEXT: vpcmpunorduw %ymm1, %ymm0, %k7 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3e,0xf9,0x03] -; CHECK-NEXT: vpcmpnequw %ymm1, %ymm0, %k0 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3e,0xc1,0x04] -; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k2 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleuw %ymm1, %ymm0, %k1 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3e,0xc9,0x06] -; CHECK-NEXT: vpcmporduw %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3e,0xd9,0x07] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpcmpequw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc1,0x00] +; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01] +; CHECK-NEXT: vpcmpleuw %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02] +; CHECK-NEXT: vpcmpunorduw %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe1,0x03] +; CHECK-NEXT: vpcmpnequw %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x04] +; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xf1,0x05] +; CHECK-NEXT: vpcmpnleuw %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xf9,0x06] +; CHECK-NEXT: vpcmporduw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x07] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) @@ -401,7 +401,8 @@ ; CHECK-LABEL: test_pcmpeq_b_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) ret i16 %res @@ -410,9 +411,10 @@ define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { ; CHECK-LABEL: test_mask_pcmpeq_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) ret i16 %res @@ -424,7 +426,8 @@ ; CHECK-LABEL: test_pcmpeq_w_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) ret i8 %res @@ -433,9 +436,10 @@ define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { ; CHECK-LABEL: test_mask_pcmpeq_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) ret i8 %res @@ -447,7 +451,8 @@ ; CHECK-LABEL: test_pcmpgt_b_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) ret i16 %res @@ -456,9 +461,10 @@ define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { ; CHECK-LABEL: test_mask_pcmpgt_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) ret i16 %res @@ -470,7 +476,8 @@ ; CHECK-LABEL: test_pcmpgt_w_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) ret i8 %res @@ -479,9 +486,10 @@ define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { ; CHECK-LABEL: test_mask_pcmpgt_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) ret i8 %res @@ -492,29 +500,29 @@ define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_cmp_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x00] -; CHECK-NEXT: vpcmpltb %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x01] -; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe9,0x02] -; CHECK-NEXT: vpcmpunordb %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xf1,0x03] -; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xf9,0x04] -; CHECK-NEXT: vpcmpnltb %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x05] -; CHECK-NEXT: vpcmpnleb %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc9,0x06] -; CHECK-NEXT: vpcmpordb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x00] +; CHECK-NEXT: vpcmpltb %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc9,0x01] +; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x02] +; CHECK-NEXT: vpcmpunordb %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x03] +; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x04] +; CHECK-NEXT: vpcmpnltb %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe9,0x05] +; CHECK-NEXT: vpcmpnleb %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xf1,0x06] +; CHECK-NEXT: vpcmpordb %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xf9,0x07] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) @@ -539,30 +547,30 @@ define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { ; CHECK-LABEL: test_mask_cmp_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 ## encoding: [0xc5,0xf8,0x92,0xdf] -; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k4 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3f,0xe1,0x00] -; CHECK-NEXT: vpcmpltb %xmm1, %xmm0, %k5 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3f,0xe9,0x01] -; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k6 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3f,0xf1,0x02] -; CHECK-NEXT: vpcmpunordb %xmm1, %xmm0, %k7 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3f,0xf9,0x03] -; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k0 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3f,0xc1,0x04] -; CHECK-NEXT: vpcmpnltb %xmm1, %xmm0, %k2 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3f,0xd1,0x05] -; CHECK-NEXT: vpcmpnleb %xmm1, %xmm0, %k1 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3f,0xc9,0x06] -; CHECK-NEXT: vpcmpordb %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3f,0xd9,0x07] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc1,0x00] +; CHECK-NEXT: vpcmpltb %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd1,0x01] +; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02] +; CHECK-NEXT: vpcmpunordb %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x03] +; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x04] +; CHECK-NEXT: vpcmpnltb %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xf1,0x05] +; CHECK-NEXT: vpcmpnleb %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xf9,0x06] +; CHECK-NEXT: vpcmpordb %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc9,0x07] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) @@ -589,29 +597,29 @@ define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_ucmp_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequb %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd9,0x00] -; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x01] -; CHECK-NEXT: vpcmpleub %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x02] -; CHECK-NEXT: vpcmpunordub %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xf1,0x03] -; CHECK-NEXT: vpcmpnequb %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xf9,0x04] -; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x06] -; CHECK-NEXT: vpcmpordub %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpcmpequb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x00] +; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x01] +; CHECK-NEXT: vpcmpleub %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x02] +; CHECK-NEXT: vpcmpunordub %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd9,0x03] +; CHECK-NEXT: vpcmpnequb %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x04] +; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x05] +; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xf1,0x06] +; CHECK-NEXT: vpcmpordub %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xf9,0x07] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) @@ -636,30 +644,30 @@ define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { ; CHECK-LABEL: test_mask_ucmp_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 ## encoding: [0xc5,0xf8,0x92,0xdf] -; CHECK-NEXT: vpcmpequb %xmm1, %xmm0, %k4 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3e,0xe1,0x00] -; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k5 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3e,0xe9,0x01] -; CHECK-NEXT: vpcmpleub %xmm1, %xmm0, %k6 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3e,0xf1,0x02] -; CHECK-NEXT: vpcmpunordub %xmm1, %xmm0, %k7 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3e,0xf9,0x03] -; CHECK-NEXT: vpcmpnequb %xmm1, %xmm0, %k0 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3e,0xc1,0x04] -; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k2 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3e,0xc9,0x06] -; CHECK-NEXT: vpcmpordub %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3e,0xd9,0x07] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpcmpequb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc1,0x00] +; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01] +; CHECK-NEXT: vpcmpleub %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02] +; CHECK-NEXT: vpcmpunordub %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe1,0x03] +; CHECK-NEXT: vpcmpnequb %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x04] +; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xf1,0x05] +; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xf9,0x06] +; CHECK-NEXT: vpcmpordub %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x07] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) @@ -686,29 +694,29 @@ define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: test_cmp_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x00] -; CHECK-NEXT: vpcmpltw %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe1,0x01] -; CHECK-NEXT: vpcmplew %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe9,0x02] -; CHECK-NEXT: vpcmpunordw %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xf1,0x03] -; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xf9,0x04] -; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x05] -; CHECK-NEXT: vpcmpnlew %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x06] -; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x00] +; CHECK-NEXT: vpcmpltw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x01] +; CHECK-NEXT: vpcmplew %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x02] +; CHECK-NEXT: vpcmpunordw %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x03] +; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe1,0x04] +; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe9,0x05] +; CHECK-NEXT: vpcmpnlew %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xf1,0x06] +; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xf9,0x07] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) @@ -733,30 +741,30 @@ define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_cmp_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 ## encoding: [0xc5,0xf8,0x92,0xdf] -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k4 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xe1,0x00] -; CHECK-NEXT: vpcmpltw %xmm1, %xmm0, %k5 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xe9,0x01] -; CHECK-NEXT: vpcmplew %xmm1, %xmm0, %k6 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xf1,0x02] -; CHECK-NEXT: vpcmpunordw %xmm1, %xmm0, %k7 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xf9,0x03] -; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k0 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xc1,0x04] -; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k2 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xd1,0x05] -; CHECK-NEXT: vpcmpnlew %xmm1, %xmm0, %k1 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xc9,0x06] -; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xd9,0x07] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc1,0x00] +; CHECK-NEXT: vpcmpltw %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x01] +; CHECK-NEXT: vpcmplew %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x02] +; CHECK-NEXT: vpcmpunordw %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x03] +; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x04] +; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xf1,0x05] +; CHECK-NEXT: vpcmpnlew %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xf9,0x06] +; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc9,0x07] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) @@ -783,29 +791,29 @@ define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: test_ucmp_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequw %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd9,0x00] -; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x01] -; CHECK-NEXT: vpcmpleuw %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe9,0x02] -; CHECK-NEXT: vpcmpunorduw %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xf1,0x03] -; CHECK-NEXT: vpcmpnequw %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xf9,0x04] -; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x06] -; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpcmpequw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x00] +; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x01] +; CHECK-NEXT: vpcmpleuw %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd1,0x02] +; CHECK-NEXT: vpcmpunorduw %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd9,0x03] +; CHECK-NEXT: vpcmpnequw %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x04] +; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe9,0x05] +; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xf1,0x06] +; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xf9,0x07] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) @@ -830,30 +838,30 @@ define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_ucmp_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 ## encoding: [0xc5,0xf8,0x92,0xdf] -; CHECK-NEXT: vpcmpequw %xmm1, %xmm0, %k4 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xe1,0x00] -; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k5 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xe9,0x01] -; CHECK-NEXT: vpcmpleuw %xmm1, %xmm0, %k6 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xf1,0x02] -; CHECK-NEXT: vpcmpunorduw %xmm1, %xmm0, %k7 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xf9,0x03] -; CHECK-NEXT: vpcmpnequw %xmm1, %xmm0, %k0 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xc1,0x04] -; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k2 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xc9,0x06] -; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xd9,0x07] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpcmpequw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x00] +; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x01] +; CHECK-NEXT: vpcmpleuw %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd9,0x02] +; CHECK-NEXT: vpcmpunorduw %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x03] +; CHECK-NEXT: vpcmpnequw %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe9,0x04] +; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xf1,0x05] +; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xf9,0x06] +; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x07] +; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovd %k7, %eax ## encoding: [0xc5,0xfb,0x93,0xc7] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) @@ -889,7 +897,7 @@ define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -902,7 +910,7 @@ define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) @@ -924,7 +932,7 @@ define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -938,7 +946,7 @@ define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b @@ -963,7 +971,7 @@ define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmbk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -979,7 +987,7 @@ define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmbkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b @@ -1005,7 +1013,7 @@ define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1018,7 +1026,7 @@ define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) @@ -1040,7 +1048,7 @@ define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1054,7 +1062,7 @@ define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b @@ -1079,7 +1087,7 @@ define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmbk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1095,7 +1103,7 @@ define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi32_rmbkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b @@ -1121,7 +1129,7 @@ define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi16_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1134,7 +1142,7 @@ define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi16_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) @@ -1156,7 +1164,7 @@ define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi16_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1170,7 +1178,7 @@ define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packs_epi16_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b @@ -1268,7 +1276,7 @@ define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1281,7 +1289,7 @@ define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) @@ -1303,7 +1311,7 @@ define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1317,7 +1325,7 @@ define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b @@ -1342,7 +1350,7 @@ define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmbk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1358,7 +1366,7 @@ define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmbkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b @@ -1384,7 +1392,7 @@ define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1397,7 +1405,7 @@ define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) @@ -1419,7 +1427,7 @@ define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1433,7 +1441,7 @@ define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b @@ -1458,7 +1466,7 @@ define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmbk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1474,7 +1482,7 @@ define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi32_rmbkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b @@ -1500,7 +1508,7 @@ define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi16_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1513,7 +1521,7 @@ define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi16_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) @@ -1535,7 +1543,7 @@ define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi16_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1549,7 +1557,7 @@ define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_packus_epi16_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b @@ -1646,7 +1654,7 @@ define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_adds_epi16_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1657,7 +1665,7 @@ define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { ; CHECK-LABEL: test_mask_adds_epi16_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) @@ -1677,7 +1685,7 @@ define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_adds_epi16_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1689,7 +1697,7 @@ define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_adds_epi16_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b @@ -1711,7 +1719,7 @@ define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epi16_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1722,7 +1730,7 @@ define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epi16_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) @@ -1742,7 +1750,7 @@ define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epi16_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1754,7 +1762,7 @@ define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epi16_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b @@ -1776,7 +1784,7 @@ define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_subs_epi16_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1787,7 +1795,7 @@ define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { ; CHECK-LABEL: test_mask_subs_epi16_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) @@ -1807,7 +1815,7 @@ define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_subs_epi16_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1819,7 +1827,7 @@ define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_subs_epi16_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b @@ -1841,7 +1849,7 @@ define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epi16_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1852,7 +1860,7 @@ define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epi16_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) @@ -1872,7 +1880,7 @@ define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epi16_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1884,7 +1892,7 @@ define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epi16_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b @@ -1906,7 +1914,7 @@ define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_adds_epu16_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1917,7 +1925,7 @@ define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { ; CHECK-LABEL: test_mask_adds_epu16_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) @@ -1937,7 +1945,7 @@ define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_adds_epu16_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1949,7 +1957,7 @@ define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_adds_epu16_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b @@ -1971,7 +1979,7 @@ define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epu16_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1982,7 +1990,7 @@ define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epu16_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) @@ -2002,7 +2010,7 @@ define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epu16_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2014,7 +2022,7 @@ define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epu16_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b @@ -2036,7 +2044,7 @@ define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_subs_epu16_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2047,7 +2055,7 @@ define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { ; CHECK-LABEL: test_mask_subs_epu16_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) @@ -2067,7 +2075,7 @@ define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_subs_epu16_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2079,7 +2087,7 @@ define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_subs_epu16_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b @@ -2101,7 +2109,7 @@ define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epu16_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2112,7 +2120,7 @@ define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epu16_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) @@ -2132,7 +2140,7 @@ define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epu16_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2144,7 +2152,7 @@ define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epu16_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b @@ -2166,7 +2174,7 @@ define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epi8_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2177,7 +2185,7 @@ define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epi8_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) @@ -2197,7 +2205,7 @@ define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epi8_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2209,7 +2217,7 @@ define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epi8_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b @@ -2296,7 +2304,7 @@ define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epi8_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2307,7 +2315,7 @@ define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epi8_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) @@ -2327,7 +2335,7 @@ define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epi8_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2339,7 +2347,7 @@ define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epi8_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b @@ -2426,7 +2434,7 @@ define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epu8_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2437,7 +2445,7 @@ define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epu8_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) @@ -2457,7 +2465,7 @@ define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epu8_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2469,7 +2477,7 @@ define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_adds_epu8_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b @@ -2556,7 +2564,7 @@ define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epu8_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2567,7 +2575,7 @@ define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epu8_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) @@ -2587,7 +2595,7 @@ define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epu8_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2599,7 +2607,7 @@ define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { ; CHECK-LABEL: test_mask_subs_epu8_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b @@ -2679,11 +2687,11 @@ define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] -; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xda] -; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xca] -; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc1] +; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda] +; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] +; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -2696,11 +2704,11 @@ define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] -; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xda] -; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xca] -; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc1] +; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda] +; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xca] +; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -2713,11 +2721,11 @@ define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] -; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xda] -; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xca] -; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc1] +; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda] +; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] +; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -2730,11 +2738,11 @@ define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] -; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xda] -; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xca] -; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc1] +; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda] +; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xca] +; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -2747,11 +2755,11 @@ define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] -; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x75,0xda] -; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x75,0xca] -; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc1] +; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x75,0xda] +; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] +; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -2764,11 +2772,11 @@ define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] -; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x75,0xda] -; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x75,0xca] -; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc1] +; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x75,0xda] +; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] +; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -2781,7 +2789,7 @@ define <16 x i8>@test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pavg_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] ; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] @@ -2813,7 +2821,7 @@ define <8 x i16>@test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pavg_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] ; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] @@ -2829,7 +2837,7 @@ define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pavg_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] ; CHECK-NEXT: vpavgw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -2845,7 +2853,7 @@ define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpabsb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8] ; CHECK-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] ; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] @@ -2877,7 +2885,7 @@ define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpabsw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8] ; CHECK-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] @@ -2893,7 +2901,7 @@ define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpabsw %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8] ; CHECK-NEXT: vpabsw %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] @@ -2909,7 +2917,7 @@ define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] ; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] @@ -2925,7 +2933,7 @@ define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] ; CHECK-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -2941,7 +2949,7 @@ define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmulh_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] ; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] @@ -2957,7 +2965,7 @@ define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmulh_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] ; CHECK-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -2973,7 +2981,7 @@ define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] ; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] @@ -2989,7 +2997,7 @@ define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] ; CHECK-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -3005,9 +3013,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovwb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] +; CHECK-NEXT: vpmovwb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] ; CHECK-NEXT: vpmovwb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] @@ -3025,7 +3033,7 @@ define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpmovwb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0x07] ; CHECK-NEXT: vpmovwb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x30,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -3039,9 +3047,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovswb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] +; CHECK-NEXT: vpmovswb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] ; CHECK-NEXT: vpmovswb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] @@ -3059,7 +3067,7 @@ define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpmovswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07] ; CHECK-NEXT: vpmovswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -3073,9 +3081,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] +; CHECK-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] ; CHECK-NEXT: vpmovuswb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] @@ -3093,7 +3101,7 @@ define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07] ; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -3107,9 +3115,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovwb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc2] +; CHECK-NEXT: vpmovwb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] ; CHECK-NEXT: vpmovwb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] @@ -3127,7 +3135,7 @@ define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpmovwb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x30,0x07] ; CHECK-NEXT: vpmovwb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x30,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -3141,9 +3149,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovswb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc2] +; CHECK-NEXT: vpmovswb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1] ; CHECK-NEXT: vpmovswb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] @@ -3161,7 +3169,7 @@ define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpmovswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07] ; CHECK-NEXT: vpmovswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -3175,9 +3183,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc2] +; CHECK-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1] ; CHECK-NEXT: vpmovuswb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] @@ -3195,7 +3203,7 @@ define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07] ; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -3209,7 +3217,7 @@ define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1] ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] @@ -3225,7 +3233,7 @@ define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] ; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1] ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] @@ -3241,7 +3249,7 @@ define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] @@ -3257,7 +3265,7 @@ define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] ; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -3273,12 +3281,12 @@ define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x02] ; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] -; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x02] -; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x02] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xcb] -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] +; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x42,0xc1,0x02] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4) @@ -3293,12 +3301,12 @@ define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x02] ; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] -; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x02] -; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x02] -; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xcb] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xc1,0x02] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4) @@ -3314,7 +3322,8 @@ ; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0) ret i16 %res @@ -3338,7 +3347,8 @@ ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0) ret i8 %res @@ -3350,7 +3360,8 @@ ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0) ret i16 %res @@ -3361,7 +3372,7 @@ define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtmask2b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7] +; CHECK-NEXT: kmovd %edi, %k0 ## encoding: [0xc5,0xfb,0x92,0xc7] ; CHECK-NEXT: vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0) @@ -3385,7 +3396,7 @@ define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7] +; CHECK-NEXT: kmovd %edi, %k0 ## encoding: [0xc5,0xfb,0x92,0xc7] ; CHECK-NEXT: vpmovm2w %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0) @@ -3397,7 +3408,7 @@ define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7] +; CHECK-NEXT: kmovd %edi, %k0 ## encoding: [0xc5,0xfb,0x92,0xc7] ; CHECK-NEXT: vpmovm2w %k0, %ymm0 ## encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0) @@ -3410,7 +3421,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv16_hi: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x10,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] ; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -3430,7 +3441,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_hi: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x10,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] ; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] @@ -3450,7 +3461,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psrav16_hi: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x11,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] ; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -3470,7 +3481,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_hi: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x11,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] ; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] @@ -3490,7 +3501,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psllv16_hi: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x12,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] ; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] @@ -3510,7 +3521,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_hi: ; CHECK: ## BB#0: ; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x12,0xd9] -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] ; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] @@ -3529,12 +3540,12 @@ define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xd8] ; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] -; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xd8] -; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xcb] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) @@ -3549,12 +3560,12 @@ define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xd8] ; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] -; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xd8] -; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0] -; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xcb] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -3569,11 +3580,11 @@ define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_ptestm_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x26,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] ; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -3606,12 +3617,13 @@ define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_ptestm_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x26,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) @@ -3624,11 +3636,11 @@ define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_ptestm_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x26,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] ; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -3643,11 +3655,11 @@ define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x26,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] ; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -3680,12 +3692,13 @@ define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x26,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) @@ -3698,11 +3711,11 @@ define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x26,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] ; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -3737,9 +3750,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] -; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpbroadcastb %dil, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] +; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] ; CHECK-NEXT: vpbroadcastb %dil, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] @@ -3757,9 +3770,9 @@ define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] -; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpbroadcastw %di, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] ; CHECK-NEXT: vpbroadcastw %di, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] @@ -3777,9 +3790,9 @@ define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] -; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] +; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpbroadcastw %di, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] ; CHECK-NEXT: vpbroadcastw %di, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] Index: llvm/trunk/test/CodeGen/X86/avx512cdvl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512cdvl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512cdvl-intrinsics.ll @@ -85,8 +85,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpconflictd %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vpconflictd %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpconflictd %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vpconflictd %xmm0, %xmm0 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 Index: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -7,7 +7,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512: ; CHECK: ## BB#0: ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 -; CHECK-NEXT: kmovb %edi, %k0 +; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: kshiftlb $7, %k0, %k1 ; CHECK-NEXT: kshiftrb $7, %k1, %k1 ; CHECK-NEXT: kshiftlb $6, %k0, %k0 @@ -38,7 +38,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8: ; CHECK: ## BB#0: ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm2 -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 @@ -78,7 +78,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512: ; CHECK: ## BB#0: ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 @@ -118,7 +118,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512: ; CHECK: ## BB#0: ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3 -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 Index: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -6,7 +6,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vcvtpd2qq {rn-sae}, %zmm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 @@ -22,7 +22,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vcvtpd2uqq {rn-sae}, %zmm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 @@ -38,7 +38,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1} ; CHECK-NEXT: vcvtps2qq {rn-sae}, %ymm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 @@ -54,7 +54,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1} ; CHECK-NEXT: vcvtps2uqq {rn-sae}, %ymm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 @@ -70,7 +70,7 @@ define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 @@ -86,7 +86,7 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 @@ -102,7 +102,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 @@ -118,7 +118,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 @@ -134,7 +134,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} ; CHECK-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 @@ -150,7 +150,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} ; CHECK-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 @@ -166,7 +166,7 @@ define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 @@ -182,7 +182,7 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 @@ -198,7 +198,7 @@ define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vreducepd $8, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vreducepd $4, {sae}, %zmm0, %zmm0 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 @@ -230,7 +230,7 @@ define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vrangepd $8, %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vrangepd $4, {sae}, %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 @@ -330,12 +330,13 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vfpclasspd $2, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovb %k0, %ecx +; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: addb %cl, %al +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1) %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) @@ -348,9 +349,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ## kill: %AX %AX %EAX @@ -452,6 +453,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovd2m %zmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0) ret i16 %res @@ -463,7 +465,8 @@ ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovq2m %zmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0) ret i8 %res @@ -486,7 +489,7 @@ define <8 x i64>@test_int_x86_avx512_cvtmask2q_512(i8 %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k0 +; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: vpmovm2q %k0, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8 %x0) @@ -534,7 +537,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512: ; CHECK: ## BB#0: ; CHECK-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] @@ -553,7 +556,7 @@ define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512_load(<2 x double>* %x0ptr, <8 x double> %x2, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512_load: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovapd (%rdi), %xmm1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq @@ -604,7 +607,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512: ; CHECK: ## BB#0: ; CHECK-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] @@ -623,7 +626,7 @@ define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512_load(<2 x i64>* %x0ptr, <8 x i64> %x2, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512_load: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovdqa (%rdi), %xmm1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll +++ llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll @@ -4,9 +4,10 @@ define i8 @mask8(i8 %x) { ; CHECK-LABEL: mask8: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k0 +; CHECK-NEXT: kmovd %edi, %k0 ; CHECK-NEXT: knotb %k0, %k0 -; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, @@ -55,7 +56,8 @@ ; CHECK-NEXT: kandb %k1, %k0, %k2 ; CHECK-NEXT: kxorb %k1, %k0, %k0 ; CHECK-NEXT: korb %k0, %k2, %k0 -; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq %ma = load <8 x i1>, <8 x i1>* %x %mb = load <8 x i1>, <8 x i1>* %y Index: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -13,7 +13,7 @@ define <4 x float> @test_mask_andnot_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -24,7 +24,7 @@ define <4 x float> @test_mask_andnot_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) @@ -44,7 +44,7 @@ define <4 x float> @test_mask_andnot_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -56,7 +56,7 @@ define <4 x float> @test_mask_andnot_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x55,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b @@ -79,7 +79,7 @@ define <4 x float> @test_mask_andnot_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rmbk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -93,7 +93,7 @@ define <4 x float> @test_mask_andnot_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rmbkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x55,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b @@ -117,7 +117,7 @@ define <8 x float> @test_mask_andnot_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -128,7 +128,7 @@ define <8 x float> @test_mask_andnot_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) @@ -148,7 +148,7 @@ define <8 x float> @test_mask_andnot_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -160,7 +160,7 @@ define <8 x float> @test_mask_andnot_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b @@ -183,7 +183,7 @@ define <8 x float> @test_mask_andnot_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rmbk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -197,7 +197,7 @@ define <8 x float> @test_mask_andnot_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_andnot_ps_rmbkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b @@ -325,7 +325,7 @@ define <4 x float> @test_mask_and_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -336,7 +336,7 @@ define <4 x float> @test_mask_and_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) @@ -356,7 +356,7 @@ define <4 x float> @test_mask_and_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -368,7 +368,7 @@ define <4 x float> @test_mask_and_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x54,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b @@ -391,7 +391,7 @@ define <4 x float> @test_mask_and_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rmbk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -405,7 +405,7 @@ define <4 x float> @test_mask_and_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rmbkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x54,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b @@ -429,7 +429,7 @@ define <8 x float> @test_mask_and_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -440,7 +440,7 @@ define <8 x float> @test_mask_and_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) @@ -460,7 +460,7 @@ define <8 x float> @test_mask_and_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -472,7 +472,7 @@ define <8 x float> @test_mask_and_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b @@ -495,7 +495,7 @@ define <8 x float> @test_mask_and_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rmbk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -509,7 +509,7 @@ define <8 x float> @test_mask_and_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_and_ps_rmbkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b @@ -637,7 +637,7 @@ define <4 x float> @test_mask_or_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -648,7 +648,7 @@ define <4 x float> @test_mask_or_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) @@ -668,7 +668,7 @@ define <4 x float> @test_mask_or_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -680,7 +680,7 @@ define <4 x float> @test_mask_or_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vorps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x56,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b @@ -703,7 +703,7 @@ define <4 x float> @test_mask_or_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rmbk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -717,7 +717,7 @@ define <4 x float> @test_mask_or_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rmbkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x56,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b @@ -741,7 +741,7 @@ define <8 x float> @test_mask_or_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -752,7 +752,7 @@ define <8 x float> @test_mask_or_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) @@ -772,7 +772,7 @@ define <8 x float> @test_mask_or_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -784,7 +784,7 @@ define <8 x float> @test_mask_or_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vorps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b @@ -807,7 +807,7 @@ define <8 x float> @test_mask_or_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rmbk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -821,7 +821,7 @@ define <8 x float> @test_mask_or_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_or_ps_rmbkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b @@ -949,7 +949,7 @@ define <4 x float> @test_mask_xor_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -960,7 +960,7 @@ define <4 x float> @test_mask_xor_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) @@ -980,7 +980,7 @@ define <4 x float> @test_mask_xor_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -992,7 +992,7 @@ define <4 x float> @test_mask_xor_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x57,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b @@ -1015,7 +1015,7 @@ define <4 x float> @test_mask_xor_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rmbk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1029,7 +1029,7 @@ define <4 x float> @test_mask_xor_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rmbkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x57,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b @@ -1053,7 +1053,7 @@ define <8 x float> @test_mask_xor_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1064,7 +1064,7 @@ define <8 x float> @test_mask_xor_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) @@ -1084,7 +1084,7 @@ define <8 x float> @test_mask_xor_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1096,7 +1096,7 @@ define <8 x float> @test_mask_xor_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b @@ -1119,7 +1119,7 @@ define <8 x float> @test_mask_xor_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rmbk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1133,7 +1133,7 @@ define <8 x float> @test_mask_xor_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_xor_ps_rmbkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b @@ -1261,7 +1261,7 @@ define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rrk_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1272,7 +1272,7 @@ define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rrkz_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) @@ -1292,7 +1292,7 @@ define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmk_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1304,7 +1304,7 @@ define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmkz_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i64>, <8 x i64>* %ptr_b @@ -1327,7 +1327,7 @@ define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmbk_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1341,7 +1341,7 @@ define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b @@ -1364,7 +1364,7 @@ define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rrk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1375,7 +1375,7 @@ define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rrkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) @@ -1395,7 +1395,7 @@ define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1407,7 +1407,7 @@ define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i64>, <4 x i64>* %ptr_b @@ -1430,7 +1430,7 @@ define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmbk_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1444,7 +1444,7 @@ define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b @@ -1468,7 +1468,7 @@ define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rrk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1479,7 +1479,7 @@ define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rrkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) @@ -1499,7 +1499,7 @@ define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1511,7 +1511,7 @@ define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <2 x i64>, <2 x i64>* %ptr_b @@ -1534,7 +1534,7 @@ define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmbk_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -1548,7 +1548,7 @@ define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) { ; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b @@ -1566,7 +1566,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01] -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01] ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01] ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca] @@ -1586,7 +1586,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01] -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01] ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01] ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xcb] @@ -1606,7 +1606,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01] -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01] ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01] ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xcb] Index: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -6,7 +6,7 @@ define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8] ; CHECK-NEXT: vcvtpd2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0] ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] @@ -22,7 +22,7 @@ define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2qq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8] ; CHECK-NEXT: vcvtpd2qq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0] ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] @@ -38,7 +38,7 @@ define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8] ; CHECK-NEXT: vcvtpd2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0] ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] @@ -54,7 +54,7 @@ define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2uqq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8] ; CHECK-NEXT: vcvtpd2uqq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0] ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] @@ -70,7 +70,7 @@ define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8] ; CHECK-NEXT: vcvtps2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0] ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] @@ -86,7 +86,7 @@ define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2qq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8] ; CHECK-NEXT: vcvtps2qq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0] ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] @@ -102,7 +102,7 @@ define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8] ; CHECK-NEXT: vcvtps2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0] ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] @@ -118,7 +118,7 @@ define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2uqq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8] ; CHECK-NEXT: vcvtps2uqq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0] ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] @@ -134,7 +134,7 @@ define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8] ; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xc0] ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] @@ -150,7 +150,7 @@ define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8] ; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xc0] ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] @@ -166,7 +166,7 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8] ; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0] ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] @@ -180,7 +180,7 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128_zext: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8] ; CHECK-NEXT: vmovq %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] ; CHECK-NEXT: ## xmm1 = xmm1[0],zero @@ -200,7 +200,7 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8] ; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0] ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] @@ -216,7 +216,7 @@ define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8] ; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0] ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] @@ -232,7 +232,7 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8] ; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0] ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] @@ -248,7 +248,7 @@ define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8] ; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0] ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] @@ -264,7 +264,7 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8] ; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0] ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] @@ -280,7 +280,7 @@ define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8] ; CHECK-NEXT: vcvttps2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] @@ -296,7 +296,7 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8] ; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0] ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] @@ -312,7 +312,7 @@ define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8] ; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xc0] ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] @@ -328,7 +328,7 @@ define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8] ; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xc0] ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] @@ -344,7 +344,7 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8] ; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0] ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] @@ -358,7 +358,7 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128_zext: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8] ; CHECK-NEXT: vmovq %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] ; CHECK-NEXT: ## xmm1 = xmm1[0],zero @@ -378,7 +378,7 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8] ; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0] ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] @@ -394,7 +394,7 @@ define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8] ; CHECK-NEXT: vcvttps2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] @@ -410,7 +410,7 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8] ; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0] ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] @@ -426,7 +426,7 @@ define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vreducepd $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04] ; CHECK-NEXT: vreducepd $8, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08] ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] @@ -442,7 +442,7 @@ define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vreducepd $4, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04] ; CHECK-NEXT: vreducepd $0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00] ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] @@ -458,7 +458,7 @@ define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vreduceps $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04] ; CHECK-NEXT: vreduceps $88, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58] ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] @@ -474,7 +474,7 @@ define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vreduceps $11, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b] ; CHECK-NEXT: vreduceps $11, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0b] ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] @@ -490,7 +490,7 @@ define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04] ; CHECK-NEXT: vrangepd $8, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08] ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0] @@ -506,7 +506,7 @@ define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04] ; CHECK-NEXT: vrangepd $88, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58] ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] @@ -522,7 +522,7 @@ define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04] ; CHECK-NEXT: vrangeps $88, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58] ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] @@ -538,7 +538,7 @@ define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04] ; CHECK-NEXT: vrangeps $88, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58] ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] @@ -554,12 +554,13 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0, i8 %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02] -; CHECK-NEXT: kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vfpclassps $4, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc0,0x04] -; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 %x1) %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 -1) @@ -572,12 +573,13 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0, i8 %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02] -; CHECK-NEXT: kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vfpclassps $4, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc0,0x04] -; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 %x1) %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 -1) @@ -590,12 +592,13 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0, i8 %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04] -; CHECK-NEXT: kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc0,0x02] -; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 %x1) %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 -1) @@ -608,12 +611,13 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02] -; CHECK-NEXT: kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vfpclasspd $4, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc0,0x04] -; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 %x1) %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 -1) @@ -626,7 +630,7 @@ define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x19,0xc8] ; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x19,0xd0] @@ -648,7 +652,7 @@ define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3, i64 * %y_ptr) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vbroadcasti32x2 (%rsi), %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x59,0x0e] ; CHECK-NEXT: ## ymm1 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x59,0xd0] @@ -673,7 +677,7 @@ define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x59,0xc8] ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x59,0xd0] ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xc0] @@ -694,7 +698,8 @@ ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0] -; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0) ret i8 %res @@ -706,7 +711,8 @@ ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0] -; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0) ret i8 %res @@ -718,7 +724,8 @@ ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0] -; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0) ret i8 %res @@ -730,7 +737,8 @@ ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256: ; CHECK: ## BB#0: ; CHECK-NEXT: vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0] -; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] +; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0) ret i8 %res @@ -741,7 +749,7 @@ define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7] +; CHECK-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7] ; CHECK-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0) @@ -753,7 +761,7 @@ define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7] +; CHECK-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7] ; CHECK-NEXT: vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0) @@ -765,7 +773,7 @@ define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7] +; CHECK-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7] ; CHECK-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0) @@ -777,7 +785,7 @@ define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7] +; CHECK-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7] ; CHECK-NEXT: vpmovm2q %k0, %ymm0 ## encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0) @@ -790,7 +798,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256: ; CHECK: ## BB#0: ; CHECK-NEXT: ## kill: %XMM0 %XMM0 %YMM0 -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd0,0x00] ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1] ; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xc8,0x00] @@ -812,7 +820,7 @@ define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256_load(<2 x double>* %x0ptr, <4 x double> %x2, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256_load: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovapd (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x0f] ; CHECK-NEXT: vshuff64x2 $0, %ymm1, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x23,0xc1,0x00] ; CHECK-NEXT: ## ymm0 {%k1} = ymm1[0,1,0,1] @@ -829,7 +837,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256: ; CHECK: ## BB#0: ; CHECK-NEXT: ## kill: %XMM0 %XMM0 %YMM0 -; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x43,0xd0,0x00] ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1] ; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xc8,0x00] @@ -851,7 +859,7 @@ define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256_load(<2 x i64>* %x0ptr, <4 x i64> %x2, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256_load: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] +; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovdqa (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x0f] ; CHECK-NEXT: vshufi64x2 $0, %ymm1, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x43,0xc1,0x00] ; CHECK-NEXT: ## ymm0 {%k1} = ymm1[0,1,0,1] Index: llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll @@ -8,14 +8,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 -; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm3 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4 -; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm4 +; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1} ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z} -; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 -; CHECK-NEXT: vpaddq %zmm2, %zmm4, %zmm1 +; CHECK-NEXT: vpaddq %zmm0, %zmm4, %zmm0 +; CHECK-NEXT: vpaddq %zmm2, %zmm3, %zmm1 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -36,14 +36,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 -; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm3 {%k1} {z} +; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm3 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4 -; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm4 +; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm4 {%k1} {z} ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm2 {%k1} {z} -; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 -; CHECK-NEXT: vpaddq %zmm2, %zmm4, %zmm1 +; CHECK-NEXT: vpaddq %zmm0, %zmm4, %zmm0 +; CHECK-NEXT: vpaddq %zmm2, %zmm3, %zmm1 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -64,14 +64,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 -; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm3 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4 -; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm4 +; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1} ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z} -; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 -; CHECK-NEXT: vpaddq %zmm2, %zmm4, %zmm1 +; CHECK-NEXT: vpaddq %zmm0, %zmm4, %zmm0 +; CHECK-NEXT: vpaddq %zmm2, %zmm3, %zmm1 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -92,14 +92,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3 -; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm3 {%k1} {z} +; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm3 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4 -; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm4 +; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm4 {%k1} {z} ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm2 {%k1} {z} -; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 -; CHECK-NEXT: vpaddq %zmm2, %zmm4, %zmm1 +; CHECK-NEXT: vpaddq %zmm0, %zmm4, %zmm0 +; CHECK-NEXT: vpaddq %zmm2, %zmm3, %zmm1 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/avx512ifmavl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512ifmavl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512ifmavl-intrinsics.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl -mattr=+avx512ifma | FileCheck %s @@ -8,14 +9,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa %xmm0, %xmm3 -; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 ; CHECK-NEXT: vmovdqa %xmm0, %xmm4 -; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 +; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 {%k1} ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} -; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vpaddq %xmm2, %xmm3, %xmm1 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -36,14 +37,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa %ymm0, %ymm3 -; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 ; CHECK-NEXT: vmovdqa %ymm0, %ymm4 -; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 +; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1} ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} -; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 -; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm4, %ymm0 +; CHECK-NEXT: vpaddq %ymm2, %ymm3, %ymm1 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -64,14 +65,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa %xmm0, %xmm3 -; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 ; CHECK-NEXT: vmovdqa %xmm0, %xmm4 -; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 +; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 {%k1} {z} ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} -; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vpaddq %xmm2, %xmm3, %xmm1 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -92,14 +93,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa %ymm0, %ymm3 -; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} {z} +; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 ; CHECK-NEXT: vmovdqa %ymm0, %ymm4 -; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 +; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 {%k1} {z} ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} -; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 -; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm4, %ymm0 +; CHECK-NEXT: vpaddq %ymm2, %ymm3, %ymm1 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -120,14 +121,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa %xmm0, %xmm3 -; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 ; CHECK-NEXT: vmovdqa %xmm0, %xmm4 -; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 +; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 {%k1} ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} -; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vpaddq %xmm2, %xmm3, %xmm1 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -148,14 +149,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa %ymm0, %ymm3 -; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 ; CHECK-NEXT: vmovdqa %ymm0, %ymm4 -; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 +; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1} ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} -; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 -; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm4, %ymm0 +; CHECK-NEXT: vpaddq %ymm2, %ymm3, %ymm1 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -176,14 +177,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa %xmm0, %xmm3 -; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 ; CHECK-NEXT: vmovdqa %xmm0, %xmm4 -; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 +; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 {%k1} {z} ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} -; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1 +; CHECK-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vpaddq %xmm2, %xmm3, %xmm1 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -204,14 +205,14 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa %ymm0, %ymm3 -; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} {z} +; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 ; CHECK-NEXT: vmovdqa %ymm0, %ymm4 -; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 +; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 {%k1} {z} ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} -; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 -; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1 +; CHECK-NEXT: vpaddq %ymm0, %ymm4, %ymm0 +; CHECK-NEXT: vpaddq %ymm2, %ymm3, %ymm1 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll @@ -6,11 +6,11 @@ define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x8d,0xd8] ; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0] -; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x8d,0xd8] -; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0x8d,0xc0] -; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfc,0xc0] +; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0] +; CHECK-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) @@ -46,11 +46,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmultishift_qb_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x83,0xd9] ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1] -; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x83,0xd9] -; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x83,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfc,0xc0] +; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x83,0xc1] +; CHECK-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) @@ -86,14 +86,14 @@ define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] -; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x75,0xda] -; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x75,0xca] +; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x75,0xda] +; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca] ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] ; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2] -; CHECK-NEXT: vpaddb %xmm1, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm3, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3] +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3) @@ -130,14 +130,14 @@ define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] -; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xda] -; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xca] +; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xda] +; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xca] ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] ; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2] -; CHECK-NEXT: vpaddb %xmm1, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm3, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3] +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3) @@ -174,7 +174,7 @@ define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -980,6 +980,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) ret i8 %res @@ -991,6 +992,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) ret i8 %res @@ -1005,6 +1007,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) ret i8 %res @@ -1018,6 +1021,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) ret i8 %res @@ -1030,6 +1034,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) ret i8 %res @@ -1041,6 +1046,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) ret i8 %res @@ -1055,6 +1061,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) ret i8 %res @@ -1068,6 +1075,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) ret i8 %res @@ -1082,6 +1090,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) ret i8 %res @@ -1095,6 +1104,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) ret i8 %res @@ -1111,6 +1121,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) ret i8 %res @@ -1126,6 +1137,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) ret i8 %res @@ -1140,6 +1152,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) ret i8 %res @@ -1153,6 +1166,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) ret i8 %res @@ -1169,6 +1183,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) ret i8 %res @@ -1184,6 +1199,7 @@ ; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c] ; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) ret i8 %res Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -6,29 +6,29 @@ define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: test_cmp_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x00] -; CHECK-NEXT: vpcmpltd %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe1,0x01] -; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe9,0x02] -; CHECK-NEXT: vpcmpunordd %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xf1,0x03] -; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xf9,0x04] -; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x05] -; CHECK-NEXT: vpcmpnled %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x06] -; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x00] +; CHECK-NEXT: vpcmpltd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x01] +; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x02] +; CHECK-NEXT: vpcmpunordd %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x03] +; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe1,0x04] +; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe9,0x05] +; CHECK-NEXT: vpcmpnled %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xf1,0x06] +; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xf9,0x07] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) @@ -53,30 +53,30 @@ define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_cmp_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 ## encoding: [0xc5,0xf8,0x92,0xdf] -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k4 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xe1,0x00] -; CHECK-NEXT: vpcmpltd %ymm1, %ymm0, %k5 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xe9,0x01] -; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k6 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xf1,0x02] -; CHECK-NEXT: vpcmpunordd %ymm1, %ymm0, %k7 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xf9,0x03] -; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xc1,0x04] -; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k2 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xd1,0x05] -; CHECK-NEXT: vpcmpnled %ymm1, %ymm0, %k1 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xc9,0x06] -; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xd9,0x07] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x00] +; CHECK-NEXT: vpcmpltd %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x01] +; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02] +; CHECK-NEXT: vpcmpunordd %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x03] +; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x04] +; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xf1,0x05] +; CHECK-NEXT: vpcmpnled %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xf9,0x06] +; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc9,0x07] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask) @@ -103,29 +103,29 @@ define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: test_ucmp_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequd %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd9,0x00] -; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x01] -; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe9,0x02] -; CHECK-NEXT: vpcmpunordud %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xf1,0x03] -; CHECK-NEXT: vpcmpnequd %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xf9,0x04] -; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x06] -; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpcmpequd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x00] +; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x01] +; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd1,0x02] +; CHECK-NEXT: vpcmpunordud %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd9,0x03] +; CHECK-NEXT: vpcmpnequd %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x04] +; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe9,0x05] +; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xf1,0x06] +; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xf9,0x07] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) @@ -150,30 +150,30 @@ define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_ucmp_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k3 ## encoding: [0xc5,0xf8,0x92,0xdf] -; CHECK-NEXT: vpcmpequd %ymm1, %ymm0, %k4 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xe1,0x00] -; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k5 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xe9,0x01] -; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k6 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xf1,0x02] -; CHECK-NEXT: vpcmpunordud %ymm1, %ymm0, %k7 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xf9,0x03] -; CHECK-NEXT: vpcmpnequd %ymm1, %ymm0, %k0 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xc1,0x04] -; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k2 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xc9,0x06] -; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xd9,0x07] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpequd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x00] +; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01] +; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02] +; CHECK-NEXT: vpcmpunordud %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x03] +; CHECK-NEXT: vpcmpnequd %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x04] +; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xf1,0x05] +; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xf9,0x06] +; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x07] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask) @@ -200,29 +200,29 @@ define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_cmp_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe9,0x00] -; CHECK-NEXT: vpcmpltq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf9,0x01] -; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf1,0x02] -; CHECK-NEXT: vpcmpunordq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x03] -; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04] -; CHECK-NEXT: vpcmpnltq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x05] -; CHECK-NEXT: vpcmpnleq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x06] -; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x07] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x00] +; CHECK-NEXT: vpcmpltq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x01] +; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02] +; CHECK-NEXT: vpcmpunordq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x03] +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x04] +; CHECK-NEXT: vpcmpnltq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe9,0x05] +; CHECK-NEXT: vpcmpnleq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf1,0x06] +; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf9,0x07] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1) @@ -247,30 +247,30 @@ define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_cmp_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xe9,0x00] -; CHECK-NEXT: vpcmpltq %ymm1, %ymm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xc1,0x01] -; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xf1,0x02] -; CHECK-NEXT: vpcmpunordq %ymm1, %ymm0, %k4 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xe1,0x03] -; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xd9,0x04] -; CHECK-NEXT: vpcmpnltq %ymm1, %ymm0, %k2 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xd1,0x05] -; CHECK-NEXT: vpcmpnleq %ymm1, %ymm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xc9,0x06] -; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xf9,0x07] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc1,0x00] +; CHECK-NEXT: vpcmpltq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd1,0x01] +; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd9,0x02] +; CHECK-NEXT: vpcmpunordq %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x03] +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x04] +; CHECK-NEXT: vpcmpnltq %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xf1,0x05] +; CHECK-NEXT: vpcmpnleq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xf9,0x06] +; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc9,0x07] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask) @@ -297,29 +297,29 @@ define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_ucmp_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x00] -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf9,0x01] -; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf1,0x02] -; CHECK-NEXT: vpcmpunorduq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x03] -; CHECK-NEXT: vpcmpnequq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd9,0x04] -; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x06] -; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x07] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpcmpequq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x00] +; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01] +; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02] +; CHECK-NEXT: vpcmpunorduq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd9,0x03] +; CHECK-NEXT: vpcmpnequq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x04] +; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x05] +; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf1,0x06] +; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf9,0x07] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1) @@ -344,30 +344,30 @@ define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_ucmp_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpequq %ymm1, %ymm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xe9,0x00] -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xc1,0x01] -; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xf1,0x02] -; CHECK-NEXT: vpcmpunorduq %ymm1, %ymm0, %k4 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xe1,0x03] -; CHECK-NEXT: vpcmpnequq %ymm1, %ymm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xd9,0x04] -; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k2 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xc9,0x06] -; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xf9,0x07] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpequq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc1,0x00] +; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd1,0x01] +; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02] +; CHECK-NEXT: vpcmpunorduq %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe1,0x03] +; CHECK-NEXT: vpcmpnequq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe9,0x04] +; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x05] +; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf9,0x06] +; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc9,0x07] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask) @@ -396,29 +396,29 @@ define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_cmp_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe9,0x00] -; CHECK-NEXT: vpcmpltd %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf9,0x01] -; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf1,0x02] -; CHECK-NEXT: vpcmpunordd %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x03] -; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04] -; CHECK-NEXT: vpcmpnltd %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x05] -; CHECK-NEXT: vpcmpnled %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x06] -; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x07] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x00] +; CHECK-NEXT: vpcmpltd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x01] +; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02] +; CHECK-NEXT: vpcmpunordd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x03] +; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x04] +; CHECK-NEXT: vpcmpnltd %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe9,0x05] +; CHECK-NEXT: vpcmpnled %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf1,0x06] +; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf9,0x07] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1) @@ -443,30 +443,30 @@ define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_cmp_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xe9,0x00] -; CHECK-NEXT: vpcmpltd %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xc1,0x01] -; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xf1,0x02] -; CHECK-NEXT: vpcmpunordd %xmm1, %xmm0, %k4 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xe1,0x03] -; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xd9,0x04] -; CHECK-NEXT: vpcmpnltd %xmm1, %xmm0, %k2 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xd1,0x05] -; CHECK-NEXT: vpcmpnled %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xc9,0x06] -; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xf9,0x07] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc1,0x00] +; CHECK-NEXT: vpcmpltd %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd1,0x01] +; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd9,0x02] +; CHECK-NEXT: vpcmpunordd %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x03] +; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x04] +; CHECK-NEXT: vpcmpnltd %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xf1,0x05] +; CHECK-NEXT: vpcmpnled %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xf9,0x06] +; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc9,0x07] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask) @@ -493,29 +493,29 @@ define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_ucmp_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequd %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x00] -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf9,0x01] -; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf1,0x02] -; CHECK-NEXT: vpcmpunordud %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x03] -; CHECK-NEXT: vpcmpnequd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd9,0x04] -; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x06] -; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x07] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpcmpequd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x00] +; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01] +; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02] +; CHECK-NEXT: vpcmpunordud %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd9,0x03] +; CHECK-NEXT: vpcmpnequd %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x04] +; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x05] +; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf1,0x06] +; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf9,0x07] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1) @@ -540,30 +540,30 @@ define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_ucmp_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpequd %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xe9,0x00] -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xc1,0x01] -; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xf1,0x02] -; CHECK-NEXT: vpcmpunordud %xmm1, %xmm0, %k4 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xe1,0x03] -; CHECK-NEXT: vpcmpnequd %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xd9,0x04] -; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k2 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xc9,0x06] -; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xf9,0x07] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpequd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc1,0x00] +; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd1,0x01] +; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02] +; CHECK-NEXT: vpcmpunordud %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe1,0x03] +; CHECK-NEXT: vpcmpnequd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe9,0x04] +; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x05] +; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf9,0x06] +; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc9,0x07] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask) @@ -590,29 +590,29 @@ define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_cmp_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe9,0x00] -; CHECK-NEXT: vpcmpltq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf9,0x01] -; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf1,0x02] -; CHECK-NEXT: vpcmpunordq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x03] -; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04] -; CHECK-NEXT: vpcmpnltq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x05] -; CHECK-NEXT: vpcmpnleq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x06] -; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x07] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x00] +; CHECK-NEXT: vpcmpltq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x01] +; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02] +; CHECK-NEXT: vpcmpunordq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x03] +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x04] +; CHECK-NEXT: vpcmpnltq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe9,0x05] +; CHECK-NEXT: vpcmpnleq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf1,0x06] +; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf9,0x07] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1) @@ -637,30 +637,30 @@ define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_cmp_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xe9,0x00] -; CHECK-NEXT: vpcmpltq %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xc1,0x01] -; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xf1,0x02] -; CHECK-NEXT: vpcmpunordq %xmm1, %xmm0, %k4 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xe1,0x03] -; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xd9,0x04] -; CHECK-NEXT: vpcmpnltq %xmm1, %xmm0, %k2 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xd1,0x05] -; CHECK-NEXT: vpcmpnleq %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xc9,0x06] -; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xf9,0x07] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc1,0x00] +; CHECK-NEXT: vpcmpltq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd1,0x01] +; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd9,0x02] +; CHECK-NEXT: vpcmpunordq %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x03] +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04] +; CHECK-NEXT: vpcmpnltq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xf1,0x05] +; CHECK-NEXT: vpcmpnleq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xf9,0x06] +; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc9,0x07] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) @@ -687,29 +687,29 @@ define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_ucmp_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x00] -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf9,0x01] -; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf1,0x02] -; CHECK-NEXT: vpcmpunorduq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x03] -; CHECK-NEXT: vpcmpnequq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd9,0x04] -; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x06] -; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x07] -; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpcmpequq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x00] +; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01] +; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02] +; CHECK-NEXT: vpcmpunorduq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd9,0x03] +; CHECK-NEXT: vpcmpnequq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x04] +; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x05] +; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf1,0x06] +; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf9,0x07] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1) @@ -734,30 +734,30 @@ define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_ucmp_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpequq %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xe9,0x00] -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xc1,0x01] -; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xf1,0x02] -; CHECK-NEXT: vpcmpunorduq %xmm1, %xmm0, %k4 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xe1,0x03] -; CHECK-NEXT: vpcmpnequq %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xd9,0x04] -; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k2 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xd1,0x05] -; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xc9,0x06] -; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xf9,0x07] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpequq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc1,0x00] +; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x01] +; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x02] +; CHECK-NEXT: vpcmpunorduq %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe1,0x03] +; CHECK-NEXT: vpcmpnequq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe9,0x04] +; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x05] +; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf9,0x06] +; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc9,0x07] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] +; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) @@ -1498,6 +1498,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1) ret i8 %res @@ -1509,6 +1510,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1) ret i8 %res @@ -1520,6 +1522,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1) ret i8 %res @@ -1531,6 +1534,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1) ret i8 %res @@ -1732,9 +1736,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] -; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xda] -; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xca] -; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc1] +; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda] +; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca] +; CHECK-NEXT: vpaddd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) @@ -1749,9 +1753,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] -; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xda] -; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xca] -; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc1] +; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda] +; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca] +; CHECK-NEXT: vpaddd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) @@ -1766,9 +1770,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] -; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xda] -; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xca] -; CHECK-NEXT: vpaddd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc1] +; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda] +; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca] +; CHECK-NEXT: vpaddd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -1783,9 +1787,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] -; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xda] -; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xca] -; CHECK-NEXT: vpaddd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc1] +; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda] +; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca] +; CHECK-NEXT: vpaddd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -1800,9 +1804,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] -; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x77,0xda] -; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x77,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x77,0xda] +; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca] +; CHECK-NEXT: vaddpd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) @@ -1817,9 +1821,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] -; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x77,0xda] -; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x77,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x77,0xda] +; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca] +; CHECK-NEXT: vaddpd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) @@ -1834,9 +1838,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] -; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xda] -; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xda] +; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] +; CHECK-NEXT: vaddps %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) @@ -1863,9 +1867,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] -; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x77,0xda] -; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x77,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x77,0xda] +; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca] +; CHECK-NEXT: vaddps %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) @@ -2007,8 +2011,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1] ; CHECK-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc2] +; CHECK-NEXT: vpmovqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1] ; CHECK-NEXT: vpmovqb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2041,8 +2045,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovsqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1] ; CHECK-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc2] +; CHECK-NEXT: vpmovsqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1] ; CHECK-NEXT: vpmovsqb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2075,8 +2079,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovusqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1] ; CHECK-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc2] +; CHECK-NEXT: vpmovusqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1] ; CHECK-NEXT: vpmovusqb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2109,8 +2113,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1] ; CHECK-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc2] +; CHECK-NEXT: vpmovqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1] ; CHECK-NEXT: vpmovqb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2143,8 +2147,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovsqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1] ; CHECK-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc2] +; CHECK-NEXT: vpmovsqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1] ; CHECK-NEXT: vpmovsqb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2177,8 +2181,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovusqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1] ; CHECK-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc2] +; CHECK-NEXT: vpmovusqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1] ; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2211,8 +2215,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1] ; CHECK-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc2] +; CHECK-NEXT: vpmovqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1] ; CHECK-NEXT: vpmovqw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -2245,8 +2249,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovsqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1] ; CHECK-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc2] +; CHECK-NEXT: vpmovsqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1] ; CHECK-NEXT: vpmovsqw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -2279,8 +2283,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovusqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1] ; CHECK-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc2] +; CHECK-NEXT: vpmovusqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1] ; CHECK-NEXT: vpmovusqw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -2313,8 +2317,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1] ; CHECK-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc2] +; CHECK-NEXT: vpmovqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1] ; CHECK-NEXT: vpmovqw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -2347,8 +2351,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovsqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1] ; CHECK-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc2] +; CHECK-NEXT: vpmovsqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1] ; CHECK-NEXT: vpmovsqw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -2381,8 +2385,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovusqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1] ; CHECK-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc2] +; CHECK-NEXT: vpmovusqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1] ; CHECK-NEXT: vpmovusqw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -2415,8 +2419,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1] ; CHECK-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc2] +; CHECK-NEXT: vpmovqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1] ; CHECK-NEXT: vpmovqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc0] ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] @@ -2449,8 +2453,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovsqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1] ; CHECK-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc2] +; CHECK-NEXT: vpmovsqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1] ; CHECK-NEXT: vpmovsqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc0] ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] @@ -2483,8 +2487,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovusqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1] ; CHECK-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc2] +; CHECK-NEXT: vpmovusqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1] ; CHECK-NEXT: vpmovusqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc0] ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] @@ -2517,8 +2521,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1] ; CHECK-NEXT: vpmovqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc2] +; CHECK-NEXT: vpmovqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1] ; CHECK-NEXT: vpmovqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc0] ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] @@ -2551,8 +2555,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovsqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x25,0xc1] ; CHECK-NEXT: vpmovsqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x25,0xc2] +; CHECK-NEXT: vpmovsqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x25,0xc1] ; CHECK-NEXT: vpmovsqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0xc0] ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] @@ -2585,8 +2589,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovusqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x15,0xc1] ; CHECK-NEXT: vpmovusqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x15,0xc2] +; CHECK-NEXT: vpmovusqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x15,0xc1] ; CHECK-NEXT: vpmovusqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0xc0] ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] @@ -2619,8 +2623,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1] ; CHECK-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc2] +; CHECK-NEXT: vpmovdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1] ; CHECK-NEXT: vpmovdb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2653,8 +2657,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovsdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1] ; CHECK-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc2] +; CHECK-NEXT: vpmovsdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1] ; CHECK-NEXT: vpmovsdb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2687,8 +2691,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovusdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1] ; CHECK-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc2] +; CHECK-NEXT: vpmovusdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1] ; CHECK-NEXT: vpmovusdb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2721,8 +2725,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1] ; CHECK-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc2] +; CHECK-NEXT: vpmovdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1] ; CHECK-NEXT: vpmovdb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2755,8 +2759,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovsdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1] ; CHECK-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc2] +; CHECK-NEXT: vpmovsdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1] ; CHECK-NEXT: vpmovsdb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2789,8 +2793,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovusdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1] ; CHECK-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc2] +; CHECK-NEXT: vpmovusdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1] ; CHECK-NEXT: vpmovusdb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc0] ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] @@ -2823,8 +2827,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1] ; CHECK-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc2] +; CHECK-NEXT: vpmovdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1] ; CHECK-NEXT: vpmovdw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -2857,8 +2861,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovsdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1] ; CHECK-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc2] +; CHECK-NEXT: vpmovsdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1] ; CHECK-NEXT: vpmovsdw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -2891,8 +2895,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovusdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1] ; CHECK-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc2] +; CHECK-NEXT: vpmovusdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1] ; CHECK-NEXT: vpmovusdw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -2925,8 +2929,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1] ; CHECK-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc2] +; CHECK-NEXT: vpmovdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1] ; CHECK-NEXT: vpmovdw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -2959,8 +2963,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovsdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1] ; CHECK-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc2] +; CHECK-NEXT: vpmovsdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1] ; CHECK-NEXT: vpmovsdw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -2993,8 +2997,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpmovusdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1] ; CHECK-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc2] +; CHECK-NEXT: vpmovusdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1] ; CHECK-NEXT: vpmovusdw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc0] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] @@ -3565,10 +3569,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16] -; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd9,0x16] ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7] +; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16] +; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] @@ -3588,10 +3592,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16] -; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd9,0x16] ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3] +; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16] +; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] @@ -3647,8 +3651,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x26,0xc8,0x0b] ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x26,0xd0,0x0b] +; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x26,0xc8,0x0b] ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x26,0xc0,0x0b] ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0] @@ -3716,9 +3720,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xda,0x21] -; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21] +; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21] +; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) @@ -3733,9 +3737,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xda,0x21] -; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21] +; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21] +; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) @@ -3750,9 +3754,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xda,0x21] -; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21] +; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21] +; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) @@ -3767,9 +3771,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xda,0x21] -; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21] +; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21] +; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) @@ -3784,9 +3788,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xda,0x21] -; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] +; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21] +; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21] +; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) @@ -3801,9 +3805,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xda,0x21] -; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] +; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21] +; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21] +; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) @@ -3818,9 +3822,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xda,0x21] -; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] +; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21] +; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21] +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) @@ -3835,9 +3839,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xda,0x21] -; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] +; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21] +; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21] +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) @@ -4282,11 +4286,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9] ; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] -; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x14,0xd9] -; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x14,0xc1] -; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xcb] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) @@ -4302,11 +4306,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9] ; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] -; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xd9] -; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x14,0xc1] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xcb] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) @@ -4322,11 +4326,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9] ; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] -; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x14,0xd9] -; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x14,0xc1] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xcb] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1] +; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) @@ -4342,11 +4346,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9] ; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] -; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xd9] -; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x14,0xc1] -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xcb] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) @@ -4442,11 +4446,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9] ; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] -; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x15,0xd9] -; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x15,0xc1] -; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xcb] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) @@ -4462,11 +4466,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9] ; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] -; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xd9] -; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x15,0xc1] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xcb] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) @@ -4482,11 +4486,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9] ; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] -; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x15,0xd9] -; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x15,0xc1] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xcb] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1] +; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) @@ -4502,11 +4506,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9] ; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] -; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xd9] -; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x15,0xc1] -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xcb] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) @@ -4602,11 +4606,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8] ; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0] -; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xd8] -; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0] -; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xcb] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] +; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) @@ -4622,11 +4626,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8] ; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0] -; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xd8] -; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x36,0xc0] -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xcb] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) @@ -4642,11 +4646,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8] ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0] -; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xd8] -; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0] -; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xcb] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) @@ -4662,11 +4666,11 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8] ; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0] -; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xd8] -; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xc0] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xcb] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) @@ -4770,13 +4774,13 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] -; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05] +; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xda,0x05] ; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0] -; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] +; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xe2,0x05] ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05] -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0] -; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc4] +; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd8,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4) %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4) @@ -4793,13 +4797,13 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] -; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05] +; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xda,0x05] ; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0] -; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] +; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xe2,0x05] ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05] -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0] -; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc4] +; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd8,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4) %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4) @@ -4816,13 +4820,13 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] -; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05] +; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xda,0x05] ; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0] -; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] +; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xe2,0x05] ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05] -; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0] -; CHECK-NEXT: vaddps %ymm4, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc4] +; CHECK-NEXT: vaddps %ymm0, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdc,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4) %res1 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4) @@ -4839,13 +4843,13 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] -; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05] +; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xda,0x05] ; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0] -; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] +; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xe2,0x05] ; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05] -; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0] -; CHECK-NEXT: vaddps %ymm4, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc4] +; CHECK-NEXT: vaddps %ymm0, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdc,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4) %res1 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4) @@ -4866,6 +4870,7 @@ ; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) @@ -4879,11 +4884,12 @@ ; CHECK-LABEL: test_int_x86_avx512_ptestm_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] +; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x27,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) @@ -4902,6 +4908,7 @@ ; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) @@ -4920,6 +4927,7 @@ ; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) @@ -4938,6 +4946,7 @@ ; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) @@ -4951,11 +4960,12 @@ ; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x27,0xc1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1] +; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] +; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x27,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) @@ -4974,6 +4984,7 @@ ; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) @@ -4992,6 +5003,7 @@ ; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] +; CHECK-NEXT: ## kill: %AL %AL %EAX ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) @@ -5005,8 +5017,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] -; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastd %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xcf] +; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastd %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xd7] ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] @@ -5025,8 +5037,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] -; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastd %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xcf] +; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastd %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xd7] ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] @@ -5045,8 +5057,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] -; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastq %rdi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xcf] +; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastq %rdi, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xd7] ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] @@ -5065,8 +5077,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] -; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastq %rdi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xcf] +; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastq %rdi, %xmm2 ## encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xd7] ; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] @@ -5351,10 +5363,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] -; CHECK-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x98,0xd9] -; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xda] +; CHECK-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1] +; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -5368,10 +5380,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xda] -; CHECK-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd9] -; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xda] +; CHECK-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1] +; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -5386,9 +5398,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] -; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0xa8,0xda] -; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xda] +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0xa8,0xca] +; CHECK-NEXT: vaddpd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -5400,10 +5412,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] -; CHECK-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x98,0xd9] -; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xda] +; CHECK-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1] +; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -5417,10 +5429,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xda] -; CHECK-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd9] -; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xda] +; CHECK-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1] +; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -5435,9 +5447,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] -; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0xa8,0xda] -; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xda] +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0xa8,0xca] +; CHECK-NEXT: vaddpd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -5449,10 +5461,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] -; CHECK-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x98,0xd9] -; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xda] +; CHECK-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1] +; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -5466,10 +5478,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xda] -; CHECK-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd9] -; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xda] +; CHECK-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1] +; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -5484,9 +5496,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] -; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0xa8,0xda] -; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xda] +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0xa8,0xca] +; CHECK-NEXT: vaddps %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -5498,10 +5510,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] -; CHECK-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x98,0xd9] -; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xda] +; CHECK-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1] +; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -5515,10 +5527,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] -; CHECK-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd9] -; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xda] +; CHECK-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1] +; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -5533,9 +5545,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] -; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0xa8,0xda] -; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xda] +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0xa8,0xca] +; CHECK-NEXT: vaddps %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -5550,10 +5562,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xda] -; CHECK-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd9] -; CHECK-NEXT: vfmsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaa,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] +; CHECK-NEXT: vfmsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaa,0xda] +; CHECK-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1] +; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -5568,10 +5580,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xda] -; CHECK-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd9] -; CHECK-NEXT: vfmsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xaa,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] +; CHECK-NEXT: vfmsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xaa,0xda] +; CHECK-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1] +; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -5585,10 +5597,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xda] -; CHECK-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd9] -; CHECK-NEXT: vfmsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaa,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] +; CHECK-NEXT: vfmsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaa,0xda] +; CHECK-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1] +; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -5602,10 +5614,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] -; CHECK-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd9] -; CHECK-NEXT: vfmsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xaa,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] +; CHECK-NEXT: vfmsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xaa,0xda] +; CHECK-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1] +; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -5714,10 +5726,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] -; CHECK-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x9e,0xd9] -; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xae,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] +; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xae,0xda] +; CHECK-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1] +; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -5731,10 +5743,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xda] -; CHECK-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd9] -; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xae,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] +; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xae,0xda] +; CHECK-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1] +; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -5746,10 +5758,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] -; CHECK-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x9e,0xd9] -; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xae,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] +; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xae,0xda] +; CHECK-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1] +; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -5763,10 +5775,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xda] -; CHECK-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd9] -; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xae,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] +; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xae,0xda] +; CHECK-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1] +; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -5778,10 +5790,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] -; CHECK-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xd9] -; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xae,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] +; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xae,0xda] +; CHECK-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1] +; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -5795,10 +5807,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xda] -; CHECK-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd9] -; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xae,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] +; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xae,0xda] +; CHECK-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1] +; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -5810,10 +5822,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] -; CHECK-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xd9] -; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xae,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] +; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xae,0xda] +; CHECK-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1] +; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -5827,10 +5839,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] -; CHECK-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd9] -; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xae,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] +; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xae,0xda] +; CHECK-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1] +; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -5842,10 +5854,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] -; CHECK-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x9c,0xd9] -; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xac,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] +; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xac,0xda] +; CHECK-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1] +; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -5857,10 +5869,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] -; CHECK-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x9c,0xd9] -; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xac,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] +; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xac,0xda] +; CHECK-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1] +; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -5872,10 +5884,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] -; CHECK-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xd9] -; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xac,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] +; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xac,0xda] +; CHECK-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1] +; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -5887,10 +5899,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] -; CHECK-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xd9] -; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xac,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] +; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xac,0xda] +; CHECK-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1] +; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -5950,10 +5962,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] -; CHECK-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x96,0xd9] -; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xda] +; CHECK-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1] +; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -5967,10 +5979,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xda] -; CHECK-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd9] -; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xda] +; CHECK-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1] +; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -5985,9 +5997,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] -; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0xa6,0xda] -; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xda] +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0xa6,0xca] +; CHECK-NEXT: vaddpd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -5999,10 +6011,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] -; CHECK-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x96,0xd9] -; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xda] +; CHECK-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1] +; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -6016,10 +6028,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xda] -; CHECK-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd9] -; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xda] +; CHECK-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1] +; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -6034,9 +6046,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] -; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0xa6,0xda] -; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xda] +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0xa6,0xca] +; CHECK-NEXT: vaddpd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -6048,10 +6060,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] -; CHECK-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x96,0xd9] -; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xda] +; CHECK-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1] +; CHECK-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -6065,10 +6077,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xda] -; CHECK-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd9] -; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xda] +; CHECK-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1] +; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -6083,9 +6095,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] -; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0xa6,0xda] -; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xda] +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0xa6,0xca] +; CHECK-NEXT: vaddps %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -6097,10 +6109,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] -; CHECK-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x96,0xd9] -; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xda] +; CHECK-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1] +; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -6114,10 +6126,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] -; CHECK-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd9] -; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xda] +; CHECK-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1] +; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -6132,9 +6144,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] -; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0xa6,0xda] -; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xda] +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0xa6,0xca] +; CHECK-NEXT: vaddps %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -6148,10 +6160,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xda] -; CHECK-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd9] -; CHECK-NEXT: vfmsubadd213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa7,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] +; CHECK-NEXT: vfmsubadd213pd %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa7,0xda] +; CHECK-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1] +; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -6165,10 +6177,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xda] -; CHECK-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd9] -; CHECK-NEXT: vfmsubadd213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa7,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] +; CHECK-NEXT: vfmsubadd213pd %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa7,0xda] +; CHECK-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1] +; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -6182,10 +6194,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xda] -; CHECK-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd9] -; CHECK-NEXT: vfmsubadd213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa7,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] +; CHECK-NEXT: vfmsubadd213ps %xmm2, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa7,0xda] +; CHECK-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1] +; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -6199,10 +6211,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] -; CHECK-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd9] -; CHECK-NEXT: vfmsubadd213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa7,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] +; CHECK-NEXT: vfmsubadd213ps %ymm2, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa7,0xda] +; CHECK-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1] +; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) Index: llvm/trunk/test/CodeGen/X86/avx512vl-logic.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-logic.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-logic.ll @@ -228,7 +228,7 @@ ; ; SKX-LABEL: test_mm256_mask_andnot_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnpd %ymm2, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -252,7 +252,7 @@ ; ; SKX-LABEL: test_mm256_maskz_andnot_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -276,7 +276,7 @@ ; ; SKX-LABEL: test_mm_mask_andnot_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnpd %xmm2, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq entry: @@ -300,7 +300,7 @@ ; ; SKX-LABEL: test_mm_maskz_andnot_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -324,7 +324,7 @@ ; ; SKX-LABEL: test_mm256_mask_andnot_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnps %ymm2, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -347,7 +347,7 @@ ; ; SKX-LABEL: test_mm256_maskz_andnot_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -370,7 +370,7 @@ ; ; SKX-LABEL: test_mm_mask_andnot_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnps %xmm2, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq entry: @@ -394,7 +394,7 @@ ; ; SKX-LABEL: test_mm_maskz_andnot_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -418,7 +418,7 @@ ; ; SKX-LABEL: test_mm256_mask_and_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandpd %ymm1, %ymm2, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -441,7 +441,7 @@ ; ; SKX-LABEL: test_mm256_maskz_and_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandpd %ymm0, %ymm1, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -464,7 +464,7 @@ ; ; SKX-LABEL: test_mm_mask_and_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandpd %xmm1, %xmm2, %xmm0 {%k1} ; SKX-NEXT: retq entry: @@ -487,7 +487,7 @@ ; ; SKX-LABEL: test_mm_maskz_and_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandpd %xmm0, %xmm1, %xmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -510,7 +510,7 @@ ; ; SKX-LABEL: test_mm256_mask_and_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandps %ymm1, %ymm2, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -532,7 +532,7 @@ ; ; SKX-LABEL: test_mm256_maskz_and_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandps %ymm0, %ymm1, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -554,7 +554,7 @@ ; ; SKX-LABEL: test_mm_mask_and_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandps %xmm1, %xmm2, %xmm0 {%k1} ; SKX-NEXT: retq entry: @@ -577,7 +577,7 @@ ; ; SKX-LABEL: test_mm_maskz_and_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vandps %xmm0, %xmm1, %xmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -600,7 +600,7 @@ ; ; SKX-LABEL: test_mm256_mask_xor_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorpd %ymm2, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -623,7 +623,7 @@ ; ; SKX-LABEL: test_mm256_maskz_xor_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -646,7 +646,7 @@ ; ; SKX-LABEL: test_mm_mask_xor_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorpd %xmm2, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq entry: @@ -669,7 +669,7 @@ ; ; SKX-LABEL: test_mm_maskz_xor_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -692,7 +692,7 @@ ; ; SKX-LABEL: test_mm256_mask_xor_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorps %ymm2, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -714,7 +714,7 @@ ; ; SKX-LABEL: test_mm256_maskz_xor_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -736,7 +736,7 @@ ; ; SKX-LABEL: test_mm_mask_xor_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorps %xmm2, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq entry: @@ -759,7 +759,7 @@ ; ; SKX-LABEL: test_mm_maskz_xor_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -782,7 +782,7 @@ ; ; SKX-LABEL: test_mm256_mask_or_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorpd %ymm1, %ymm2, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -805,7 +805,7 @@ ; ; SKX-LABEL: test_mm256_maskz_or_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -828,7 +828,7 @@ ; ; SKX-LABEL: test_mm_mask_or_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorpd %xmm1, %xmm2, %xmm0 {%k1} ; SKX-NEXT: retq entry: @@ -851,7 +851,7 @@ ; ; SKX-LABEL: test_mm_maskz_or_pd: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -874,7 +874,7 @@ ; ; SKX-LABEL: test_mm256_mask_or_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorps %ymm1, %ymm2, %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -896,7 +896,7 @@ ; ; SKX-LABEL: test_mm256_maskz_or_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 {%k1} {z} ; SKX-NEXT: retq entry: @@ -918,7 +918,7 @@ ; ; SKX-LABEL: test_mm_mask_or_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorps %xmm1, %xmm2, %xmm0 {%k1} ; SKX-NEXT: retq entry: @@ -941,7 +941,7 @@ ; ; SKX-LABEL: test_mm_maskz_or_ps: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 {%k1} {z} ; SKX-NEXT: retq entry: Index: llvm/trunk/test/CodeGen/X86/combine-testm-and.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-testm-and.ll +++ llvm/trunk/test/CodeGen/X86/combine-testm-and.ll @@ -17,7 +17,7 @@ define i32 @combineTESTM_AND_2(<8 x i64> %a, <8 x i64> %b , i8 %mask) { ; CHECK-LABEL: combineTESTM_AND_2: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1} ; CHECK-NEXT: kmovb %k0, %eax ; CHECK-NEXT: vzeroupper @@ -31,7 +31,7 @@ define i32 @combineTESTM_AND_mask_3(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) { ; CHECK-LABEL: combineTESTM_AND_mask_3: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovb %k0, %eax ; CHECK-NEXT: vzeroupper @@ -46,7 +46,7 @@ define i32 @combineTESTM_AND_mask_4(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) { ; CHECK-LABEL: combineTESTM_AND_mask_4: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovb %k0, %eax ; CHECK-NEXT: vzeroupper Index: llvm/trunk/test/CodeGen/X86/compress_expand.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/compress_expand.ll +++ llvm/trunk/test/CodeGen/X86/compress_expand.ll @@ -8,23 +8,37 @@ define <16 x float> @test1(float* %base) { -; ALL-LABEL: test1: -; ALL: # BB#0: -; ALL-NEXT: movw $-2049, %ax # imm = 0xF7FF -; ALL-NEXT: kmovw %eax, %k1 -; ALL-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} -; ALL-NEXT: retq +; SKX-LABEL: test1: +; SKX: # BB#0: +; SKX-NEXT: movw $-2049, %ax # imm = 0xF7FF +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq +; +; KNL-LABEL: test1: +; KNL: # BB#0: +; KNL-NEXT: movw $-2049, %ax # imm = 0xF7FF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} +; KNL-NEXT: retq %res = call <16 x float> @llvm.masked.expandload.v16f32(float* %base, <16 x i1> , <16 x float> undef) ret <16 x float>%res } define <16 x float> @test2(float* %base, <16 x float> %src0) { -; ALL-LABEL: test2: -; ALL: # BB#0: -; ALL-NEXT: movw $30719, %ax # imm = 0x77FF -; ALL-NEXT: kmovw %eax, %k1 -; ALL-NEXT: vexpandps (%rdi), %zmm0 {%k1} -; ALL-NEXT: retq +; SKX-LABEL: test2: +; SKX: # BB#0: +; SKX-NEXT: movw $30719, %ax # imm = 0x77FF +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: vexpandps (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq +; +; KNL-LABEL: test2: +; KNL: # BB#0: +; KNL-NEXT: movw $30719, %ax # imm = 0x77FF +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: vexpandps (%rdi), %zmm0 {%k1} +; KNL-NEXT: retq %res = call <16 x float> @llvm.masked.expandload.v16f32(float* %base, <16 x i1> , <16 x float> %src0) ret <16 x float>%res } @@ -52,7 +66,7 @@ ; SKX-LABEL: test4: ; SKX: # BB#0: ; SKX-NEXT: movb $7, %al -; SKX-NEXT: kmovb %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vexpandps (%rdi), %xmm0 {%k1} ; SKX-NEXT: retq ; @@ -72,7 +86,7 @@ ; SKX-LABEL: test5: ; SKX: # BB#0: ; SKX-NEXT: movb $2, %al -; SKX-NEXT: kmovb %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vpexpandq (%rdi), %xmm0 {%k1} ; SKX-NEXT: retq ; @@ -97,7 +111,7 @@ ; SKX-LABEL: test6: ; SKX: # BB#0: ; SKX-NEXT: movw $-2049, %ax # imm = 0xF7FF -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vcompressps %zmm0, (%rdi) {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/fast-isel-load-i1.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-load-i1.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-load-i1.ll @@ -4,7 +4,9 @@ define i1 @test_i1(i1* %b) { ; CHECK-LABEL: test_i1: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: testb $1, (%rdi) +; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: testb $1, %al ; CHECK-NEXT: je .LBB0_2 ; CHECK-NEXT: # BB#1: # %in ; CHECK-NEXT: xorl %eax, %eax Index: llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov.ll @@ -6,21 +6,12 @@ ; conditon input (argument or cmp). Currently i8 is not supported. define zeroext i16 @select_cmov_i16(i1 zeroext %cond, i16 zeroext %a, i16 zeroext %b) { -; NOAVX512-LABEL: select_cmov_i16: -; NOAVX512: ## BB#0: -; NOAVX512-NEXT: testb $1, %dil -; NOAVX512-NEXT: cmovew %dx, %si -; NOAVX512-NEXT: movzwl %si, %eax -; NOAVX512-NEXT: retq -; -; AVX512-LABEL: select_cmov_i16: -; AVX512: ## BB#0: -; AVX512-NEXT: kmovw %edi, %k0 -; AVX512-NEXT: kmovw %k0, %eax -; AVX512-NEXT: testb $1, %al -; AVX512-NEXT: cmovew %dx, %si -; AVX512-NEXT: movzwl %si, %eax -; AVX512-NEXT: retq +; CHECK-LABEL: select_cmov_i16: +; CHECK: ## BB#0: +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmovew %dx, %si +; CHECK-NEXT: movzwl %si, %eax +; CHECK-NEXT: retq %1 = select i1 %cond, i16 %a, i16 %b ret i16 %1 } @@ -38,21 +29,12 @@ } define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) { -; NOAVX512-LABEL: select_cmov_i32: -; NOAVX512: ## BB#0: -; NOAVX512-NEXT: testb $1, %dil -; NOAVX512-NEXT: cmovel %edx, %esi -; NOAVX512-NEXT: movl %esi, %eax -; NOAVX512-NEXT: retq -; -; AVX512-LABEL: select_cmov_i32: -; AVX512: ## BB#0: -; AVX512-NEXT: kmovw %edi, %k0 -; AVX512-NEXT: kmovw %k0, %eax -; AVX512-NEXT: testb $1, %al -; AVX512-NEXT: cmovel %edx, %esi -; AVX512-NEXT: movl %esi, %eax -; AVX512-NEXT: retq +; CHECK-LABEL: select_cmov_i32: +; CHECK: ## BB#0: +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmovel %edx, %esi +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: retq %1 = select i1 %cond, i32 %a, i32 %b ret i32 %1 } @@ -70,21 +52,12 @@ } define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) { -; NOAVX512-LABEL: select_cmov_i64: -; NOAVX512: ## BB#0: -; NOAVX512-NEXT: testb $1, %dil -; NOAVX512-NEXT: cmoveq %rdx, %rsi -; NOAVX512-NEXT: movq %rsi, %rax -; NOAVX512-NEXT: retq -; -; AVX512-LABEL: select_cmov_i64: -; AVX512: ## BB#0: -; AVX512-NEXT: kmovw %edi, %k0 -; AVX512-NEXT: kmovw %k0, %eax -; AVX512-NEXT: testb $1, %al -; AVX512-NEXT: cmoveq %rdx, %rsi -; AVX512-NEXT: movq %rsi, %rax -; AVX512-NEXT: retq +; CHECK-LABEL: select_cmov_i64: +; CHECK: ## BB#0: +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmoveq %rdx, %rsi +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: retq %1 = select i1 %cond, i64 %a, i64 %b ret i64 %1 } Index: llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll +++ llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll @@ -165,7 +165,7 @@ define <8 x double> @test12(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { ; SKX-LABEL: test12: ; SKX: # BB#0: # %entry -; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} ; SKX-NEXT: vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; SKX-NEXT: retq @@ -199,7 +199,7 @@ define <16 x float> @test14(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { ; SKX-LABEL: test14: ; SKX: # BB#0: # %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; SKX-NEXT: retq @@ -219,7 +219,7 @@ define <16 x float> @test15(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { ; SKX-LABEL: test15: ; SKX: # BB#0: # %entry -; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: kmovd %edi, %k1 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm3 ; SKX-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1 ; SKX-NEXT: vmovaps %zmm1, %zmm3 {%k1} Index: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll +++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll @@ -360,7 +360,7 @@ ; ; SKX-LABEL: test7: ; SKX: # BB#0: -; SKX-NEXT: kmovb %esi, %k1 +; SKX-NEXT: kmovw %esi, %k1 ; SKX-NEXT: kmovw %k1, %k2 ; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm1 {%k2} ; SKX-NEXT: vmovdqa %ymm1, %ymm2 @@ -1452,7 +1452,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] ; SKX-NEXT: movb $3, %al -; SKX-NEXT: kmovb %eax, %k1 +; SKX-NEXT: kmovw %eax, %k1 ; SKX-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1} ; SKX-NEXT: retq ; @@ -1461,7 +1461,7 @@ ; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: movb $3, %cl -; SKX_32-NEXT: kmovb %ecx, %k1 +; SKX_32-NEXT: kmovw %ecx, %k1 ; SKX_32-NEXT: vgatherdps (%eax,%xmm1,4), %xmm0 {%k1} ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> @@ -1499,9 +1499,9 @@ ; SKX-LABEL: test28: ; SKX: # BB#0: ; SKX-NEXT: # kill: %XMM1 %XMM1 %YMM1 -; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX-NEXT: movb $3, %al -; SKX-NEXT: kmovb %eax, %k1 +; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -1509,9 +1509,9 @@ ; SKX_32-LABEL: test28: ; SKX_32: # BB#0: ; SKX_32-NEXT: # kill: %XMM1 %XMM1 %YMM1 -; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX_32-NEXT: movb $3, %al -; SKX_32-NEXT: kmovb %eax, %k1 +; SKX_32-NEXT: kmovw %eax, %k1 +; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} ; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: retl Index: llvm/trunk/test/CodeGen/X86/masked_memop.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/masked_memop.ll +++ llvm/trunk/test/CodeGen/X86/masked_memop.ll @@ -697,7 +697,7 @@ ; SKX-LABEL: mload_constmask_v4f32: ; SKX: ## BB#0: ; SKX-NEXT: movb $13, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} ; SKX-NEXT: retq %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1> , <4 x float> %dst) @@ -731,7 +731,7 @@ ; SKX-LABEL: mload_constmask_v4i32: ; SKX: ## BB#0: ; SKX-NEXT: movb $14, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} ; SKX-NEXT: retq %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> , <4 x i32> %dst) @@ -760,7 +760,7 @@ ; SKX-LABEL: mload_constmask_v8f32: ; SKX: ## BB#0: ; SKX-NEXT: movb $7, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovups (%rdi), %ymm0 {%k1} ; SKX-NEXT: retq %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> , <8 x float> %dst) @@ -785,7 +785,7 @@ ; SKX-LABEL: mload_constmask_v4f64: ; SKX: ## BB#0: ; SKX-NEXT: movb $7, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovupd (%rdi), %ymm0 {%k1} ; SKX-NEXT: retq %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> , <4 x double> %dst) @@ -817,7 +817,7 @@ ; SKX-LABEL: mload_constmask_v8i32: ; SKX: ## BB#0: ; SKX-NEXT: movb $-121, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} ; SKX-NEXT: retq %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1> , <8 x i32> %dst) @@ -845,7 +845,7 @@ ; SKX-LABEL: mload_constmask_v4i64: ; SKX: ## BB#0: ; SKX-NEXT: movb $9, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} ; SKX-NEXT: retq %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> , <4 x i64> %dst) @@ -861,12 +861,19 @@ ; AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1,2],ymm0[3] ; AVX-NEXT: retq ; -; AVX512-LABEL: mload_constmask_v8f64: -; AVX512: ## BB#0: -; AVX512-NEXT: movb $-121, %al -; AVX512-NEXT: kmovw %eax, %k1 -; AVX512-NEXT: vmovupd (%rdi), %zmm0 {%k1} -; AVX512-NEXT: retq +; AVX512F-LABEL: mload_constmask_v8f64: +; AVX512F: ## BB#0: +; AVX512F-NEXT: movb $-121, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: retq +; +; SKX-LABEL: mload_constmask_v8f64: +; SKX: ## BB#0: +; SKX-NEXT: movb $-121, %al +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1> , <8 x double> %dst) ret <8 x double> %res } @@ -889,7 +896,7 @@ ; SKX-LABEL: mload_constmask_v4f64_undef_passthrough: ; SKX: ## BB#0: ; SKX-NEXT: movb $7, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ; SKX-NEXT: retq %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> , <4 x double> undef) @@ -918,7 +925,7 @@ ; SKX-LABEL: mload_constmask_v4i64_undef_passthrough: ; SKX: ## BB#0: ; SKX-NEXT: movb $6, %al -; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ; SKX-NEXT: retq %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> , <4 x i64> undef) Index: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll +++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll @@ -136,13 +136,21 @@ } define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp { -; ALL-LABEL: merge_8f64_f64_1u3u5zu8: -; ALL: # BB#0: -; ALL-NEXT: movb $32, %al -; ALL-NEXT: kmovw %eax, %k0 -; ALL-NEXT: knotw %k0, %k1 -; ALL-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z} -; ALL-NEXT: retq +; AVX512F-LABEL: merge_8f64_f64_1u3u5zu8: +; AVX512F: # BB#0: +; AVX512F-NEXT: movb $32, %al +; AVX512F-NEXT: kmovw %eax, %k0 +; AVX512F-NEXT: knotw %k0, %k1 +; AVX512F-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: merge_8f64_f64_1u3u5zu8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: movb $32, %al +; AVX512BW-NEXT: kmovd %eax, %k0 +; AVX512BW-NEXT: knotw %k0, %k1 +; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z} +; AVX512BW-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8: ; X32-AVX512F: # BB#0: @@ -223,13 +231,21 @@ } define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp { -; ALL-LABEL: merge_8i64_i64_1u3u5zu8: -; ALL: # BB#0: -; ALL-NEXT: movb $32, %al -; ALL-NEXT: kmovw %eax, %k0 -; ALL-NEXT: knotw %k0, %k1 -; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z} -; ALL-NEXT: retq +; AVX512F-LABEL: merge_8i64_i64_1u3u5zu8: +; AVX512F: # BB#0: +; AVX512F-NEXT: movb $32, %al +; AVX512F-NEXT: kmovw %eax, %k0 +; AVX512F-NEXT: knotw %k0, %k1 +; AVX512F-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: merge_8i64_i64_1u3u5zu8: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: movb $32, %al +; AVX512BW-NEXT: kmovd %eax, %k0 +; AVX512BW-NEXT: knotw %k0, %k1 +; AVX512BW-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z} +; AVX512BW-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8: ; X32-AVX512F: # BB#0: @@ -446,13 +462,21 @@ } define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp { -; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: -; ALL: # BB#0: -; ALL-NEXT: movw $8240, %ax # imm = 0x2030 -; ALL-NEXT: kmovw %eax, %k0 -; ALL-NEXT: knotw %k0, %k1 -; ALL-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} -; ALL-NEXT: retq +; AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: +; AVX512F: # BB#0: +; AVX512F-NEXT: movw $8240, %ax # imm = 0x2030 +; AVX512F-NEXT: kmovw %eax, %k0 +; AVX512F-NEXT: knotw %k0, %k1 +; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: movw $8240, %ax # imm = 0x2030 +; AVX512BW-NEXT: kmovd %eax, %k0 +; AVX512BW-NEXT: knotw %k0, %k1 +; AVX512BW-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} +; AVX512BW-NEXT: retq ; ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: ; X32-AVX512F: # BB#0: Index: llvm/trunk/test/CodeGen/X86/pr27591.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr27591.ll +++ llvm/trunk/test/CodeGen/X86/pr27591.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -o - -O0 < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -8,11 +9,12 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: # implicit-def: %EDI +; CHECK-NEXT: # implicit-def: %EDI ; CHECK-NEXT: movb %al, %dil ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: kmovd %k0, %edi +; CHECK-NEXT: movb %dil, %al ; CHECK-NEXT: andb $1, %al ; CHECK-NEXT: movzbl %al, %edi ; CHECK-NEXT: callq callee1 @@ -30,7 +32,7 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: # implicit-def: %EDI +; CHECK-NEXT: # implicit-def: %EDI ; CHECK-NEXT: movb %al, %dil ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k0 Index: llvm/trunk/test/CodeGen/X86/pr32241.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr32241.ll +++ llvm/trunk/test/CodeGen/X86/pr32241.ll @@ -4,55 +4,39 @@ define i32 @_Z3foov() { ; CHECK-LABEL: _Z3foov: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: subl $20, %esp ; CHECK-NEXT: .Lcfi0: -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: subl $24, %esp -; CHECK-NEXT: .Lcfi1: -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .Lcfi2: -; CHECK-NEXT: .cfi_offset %ebx, -8 -; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: movw $10959, {{[0-9]+}}(%esp) # imm = 0x2ACF ; CHECK-NEXT: movw $-15498, {{[0-9]+}}(%esp) # imm = 0xC376 ; CHECK-NEXT: movw $19417, {{[0-9]+}}(%esp) # imm = 0x4BD9 -; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: cmpw $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: kmovb %eax, %k0 -; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %cx +; CHECK-NEXT: kxnorw %k0, %k0, %k0 +; CHECK-NEXT: kshiftrw $15, %k0, %k0 +; CHECK-NEXT: testw %cx, %cx +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%esp) # 2-byte Spill ; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # BB#1: # %lor.rhs +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_1: # %lor.rhs ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movb %al, %cl -; CHECK-NEXT: kmovb %ecx, %k0 +; CHECK-NEXT: kmovd %eax, %k0 ; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%esp) # 2-byte Spill ; CHECK-NEXT: jmp .LBB0_2 ; CHECK-NEXT: .LBB0_2: # %lor.end ; CHECK-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # 2-byte Reload +; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kshiftrw $15, %k1, %k1 ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovw %k0, %ecx -; CHECK-NEXT: andl $1, %ecx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload -; CHECK-NEXT: subl %ecx, %edx -; CHECK-NEXT: setl %ah -; CHECK-NEXT: # implicit-def: %ECX -; CHECK-NEXT: movb %ah, %cl -; CHECK-NEXT: andl $1, %ecx -; CHECK-NEXT: kmovw %ecx, %k0 -; CHECK-NEXT: kmovb %k0, %ebx -; CHECK-NEXT: andb $1, %bl -; CHECK-NEXT: movzbl %bl, %ecx -; CHECK-NEXT: xorl $-1, %ecx -; CHECK-NEXT: cmpl $0, %ecx -; CHECK-NEXT: kmovb %eax, %k0 -; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%esp) # 2-byte Spill +; CHECK-NEXT: kmovw %k1, {{[0-9]+}}(%esp) # 2-byte Spill ; CHECK-NEXT: jne .LBB0_4 -; CHECK-NEXT: # BB#3: # %lor.rhs4 +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_3: # %lor.rhs4 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movb %al, %cl -; CHECK-NEXT: kmovb %ecx, %k0 +; CHECK-NEXT: kmovd %eax, %k0 ; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%esp) # 2-byte Spill ; CHECK-NEXT: jmp .LBB0_4 ; CHECK-NEXT: .LBB0_4: # %lor.end5 @@ -62,8 +46,7 @@ ; CHECK-NEXT: movw %ax, %cx ; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: addl $24, %esp -; CHECK-NEXT: popl %ebx +; CHECK-NEXT: addl $20, %esp ; CHECK-NEXT: retl entry: %aa = alloca i16, align 2 Index: llvm/trunk/test/CodeGen/X86/pr32256.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr32256.ll +++ llvm/trunk/test/CodeGen/X86/pr32256.ll @@ -7,47 +7,39 @@ define void @_Z1av() { ; CHECK-LABEL: _Z1av: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: subl $6, %esp ; CHECK-NEXT: .Lcfi0: -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: subl $8, %esp -; CHECK-NEXT: .Lcfi1: -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .Lcfi2: -; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: .cfi_def_cfa_offset 10 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movb %al, %cl -; CHECK-NEXT: movb $1, %dl -; CHECK-NEXT: movb c, %ch +; CHECK-NEXT: kmovd %eax, %k0 +; CHECK-NEXT: movb c, %cl ; CHECK-NEXT: # implicit-def: %EAX -; CHECK-NEXT: movb %ch, %al +; CHECK-NEXT: movb %cl, %al ; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: kmovw %eax, %k0 -; CHECK-NEXT: kmovq %k0, %k1 -; CHECK-NEXT: kxnorw %k0, %k0, %k2 -; CHECK-NEXT: kshiftrw $15, %k2, %k2 -; CHECK-NEXT: kxorw %k2, %k0, %k0 -; CHECK-NEXT: kmovb %k0, %ebx -; CHECK-NEXT: testb $1, %bl -; CHECK-NEXT: kmovb %ecx, %k0 -; CHECK-NEXT: kmovw %k1, {{[0-9]+}}(%esp) # 2-byte Spill -; CHECK-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kmovq %k1, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k3 +; CHECK-NEXT: kshiftrw $15, %k3, %k3 +; CHECK-NEXT: kxorw %k3, %k1, %k1 +; CHECK-NEXT: kmovd %k1, %eax +; CHECK-NEXT: movb %al, %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: kmovw %k2, {{[0-9]+}}(%esp) # 2-byte Spill ; CHECK-NEXT: kmovw %k0, (%esp) # 2-byte Spill ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: jmp .LBB0_2 ; CHECK-NEXT: .LBB0_1: # %land.rhs ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movb %al, %cl -; CHECK-NEXT: kmovb %ecx, %k0 +; CHECK-NEXT: kmovd %eax, %k0 ; CHECK-NEXT: kmovw %k0, (%esp) # 2-byte Spill ; CHECK-NEXT: jmp .LBB0_2 ; CHECK-NEXT: .LBB0_2: # %land.end ; CHECK-NEXT: kmovw (%esp), %k0 # 2-byte Reload -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) -; CHECK-NEXT: addl $8, %esp -; CHECK-NEXT: popl %ebx +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: movb %al, %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) +; CHECK-NEXT: addl $6, %esp ; CHECK-NEXT: retl entry: %b = alloca i8, align 1 Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -422,7 +422,7 @@ ; AVX512VL-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-21846, %ax # imm = 0xAAAA -; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: kmovd %eax, %k1 ; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -462,7 +462,7 @@ ; AVX512VL-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-30584, %ax # imm = 0x8888 -; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: kmovd %eax, %k1 ; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -521,7 +521,7 @@ ; AVX512VL-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-28528, %ax # imm = 0x9090 -; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: kmovd %eax, %k1 ; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -562,7 +562,7 @@ ; AVX512VL-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-21264, %ax # imm = 0xACF0 -; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: kmovd %eax, %k1 ; AVX512VL-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -718,7 +718,7 @@ ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-32768, %ax # imm = 0x8000 -; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: kmovd %eax, %k1 ; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -743,7 +743,7 @@ ; AVX512VL-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $1, %ax -; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: kmovd %eax, %k1 ; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -768,7 +768,7 @@ ; AVX512VL-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $21930, %ax # imm = 0x55AA -; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: kmovd %eax, %k1 ; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -793,7 +793,7 @@ ; AVX512VL-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-21931, %ax # imm = 0xAA55 -; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: kmovd %eax, %k1 ; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -383,7 +383,7 @@ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] ; AVX512VL-NEXT: movw $1, %ax -; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: kmovd %eax, %k1 ; AVX512VL-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: retq @@ -414,7 +414,7 @@ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] ; AVX512VL-NEXT: movw $1, %ax -; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: kmovd %eax, %k1 ; AVX512VL-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -262,12 +262,19 @@ } define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) { -; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u: -; ALL: # BB#0: -; ALL-NEXT: movw $8, %ax -; ALL-NEXT: kmovw %eax, %k1 -; ALL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} -; ALL-NEXT: retq +; AVX512F-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u: +; AVX512F: # BB#0: +; AVX512F-NEXT: movw $8, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: movw $8, %ax +; AVX512BW-NEXT: kmovd %eax, %k1 +; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; AVX512BW-NEXT: retq %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> ret <16 x i32> %c } @@ -398,12 +405,19 @@ } define <16 x i32> @mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01(<16 x i32> %a, <16 x i32> %passthru, i16 %mask) { -; ALL-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: -; ALL: # BB#0: -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: valignd {{.*#+}} zmm1 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] -; ALL-NEXT: vmovdqa64 %zmm1, %zmm0 -; ALL-NEXT: retq +; AVX512F-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: valignd {{.*#+}} zmm1 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] +; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: valignd {{.*#+}} zmm1 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] +; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> %mask.cast = bitcast i16 %mask to <16 x i1> %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru @@ -411,12 +425,19 @@ } define <16 x i32> @mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) { -; ALL-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: -; ALL: # BB#0: -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: valignd {{.*#+}} zmm2 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] -; ALL-NEXT: vmovdqa64 %zmm2, %zmm0 -; ALL-NEXT: retq +; AVX512F-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: valignd {{.*#+}} zmm2 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] +; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: valignd {{.*#+}} zmm2 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] +; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> %mask.cast = bitcast i16 %mask to <16 x i1> %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru @@ -424,11 +445,17 @@ } define <16 x i32> @maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01(<16 x i32> %a, i16 %mask) { -; ALL-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: -; ALL: # BB#0: -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] -; ALL-NEXT: retq +; AVX512F-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] +; AVX512BW-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> %mask.cast = bitcast i16 %mask to <16 x i1> %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> zeroinitializer @@ -436,11 +463,17 @@ } define <16 x i32> @maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17(<16 x i32> %a, <16 x i32> %b, i16 %mask) { -; ALL-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: -; ALL: # BB#0: -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] -; ALL-NEXT: retq +; AVX512F-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] +; AVX512BW-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> %mask.cast = bitcast i16 %mask to <16 x i1> %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> zeroinitializer @@ -510,12 +543,19 @@ } define <16 x float> @mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23(<16 x float> %a, <16 x float> %b, <16 x float> %passthru, i16 %mask) { -; ALL-LABEL: mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: -; ALL: # BB#0: -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} -; ALL-NEXT: vmovaps %zmm2, %zmm0 -; ALL-NEXT: retq +; AVX512F-LABEL: mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} +; AVX512F-NEXT: vmovaps %zmm2, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 +; AVX512BW-NEXT: retq %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> %mask.cast = bitcast i16 %mask to <16 x i1> %res = select <16 x i1> %mask.cast, <16 x float> %shuffle, <16 x float> %passthru @@ -523,12 +563,19 @@ } define <16 x float> @mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15(<16 x float> %a, <16 x float> %b, <16 x float> %passthru, i16 %mask) { -; ALL-LABEL: mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: -; ALL: # BB#0: -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} -; ALL-NEXT: vmovaps %zmm2, %zmm0 -; ALL-NEXT: retq +; AVX512F-LABEL: mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} +; AVX512F-NEXT: vmovaps %zmm2, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 +; AVX512BW-NEXT: retq %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> %mask.cast = bitcast i16 %mask to <16 x i1> %res = select <16 x i1> %mask.cast, <16 x float> %shuffle, <16 x float> %passthru @@ -536,12 +583,19 @@ } define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) { -; ALL-LABEL: mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: -; ALL: # BB#0: -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} -; ALL-NEXT: vmovdqa64 %zmm2, %zmm0 -; ALL-NEXT: retq +; AVX512F-LABEL: mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} +; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> %mask.cast = bitcast i16 %mask to <16 x i1> %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru @@ -549,12 +603,19 @@ } define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) { -; ALL-LABEL: mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: -; ALL: # BB#0: -; ALL-NEXT: kmovw %edi, %k1 -; ALL-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} -; ALL-NEXT: vmovdqa64 %zmm2, %zmm0 -; ALL-NEXT: retq +; AVX512F-LABEL: mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} +; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> %mask.cast = bitcast i16 %mask to <16 x i1> %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -10,7 +10,7 @@ ; SKX64: # BB#0: ; SKX64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX64-NEXT: movb $5, %al -; SKX64-NEXT: kmovb %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} ; SKX64-NEXT: retq ; @@ -25,7 +25,7 @@ ; SKX32: # BB#0: ; SKX32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX32-NEXT: movb $5, %al -; SKX32-NEXT: kmovb %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} ; SKX32-NEXT: retl ; @@ -44,7 +44,7 @@ ; SKX64: # BB#0: ; SKX64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX64-NEXT: movb $-86, %al -; SKX64-NEXT: kmovb %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} ; SKX64-NEXT: retq ; @@ -61,7 +61,7 @@ ; SKX32: # BB#0: ; SKX32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX32-NEXT: movb $-86, %al -; SKX32-NEXT: kmovb %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} ; SKX32-NEXT: retl ; @@ -83,7 +83,7 @@ ; SKX64: # BB#0: ; SKX64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX64-NEXT: movb $9, %al -; SKX64-NEXT: kmovb %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} ; SKX64-NEXT: retq ; @@ -99,7 +99,7 @@ ; SKX32: # BB#0: ; SKX32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX32-NEXT: movb $9, %al -; SKX32-NEXT: kmovb %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} ; SKX32-NEXT: retl ; @@ -120,7 +120,7 @@ ; SKX64: # BB#0: ; SKX64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX64-NEXT: movb $-127, %al -; SKX64-NEXT: kmovb %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} ; SKX64-NEXT: retq ; @@ -135,7 +135,7 @@ ; SKX32: # BB#0: ; SKX32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX32-NEXT: movb $-127, %al -; SKX32-NEXT: kmovb %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} ; SKX32-NEXT: retl ; @@ -155,7 +155,7 @@ ; SKX64: # BB#0: ; SKX64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX64-NEXT: movb $9, %al -; SKX64-NEXT: kmovb %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} ; SKX64-NEXT: retq ; @@ -171,7 +171,7 @@ ; SKX32: # BB#0: ; SKX32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX32-NEXT: movb $9, %al -; SKX32-NEXT: kmovb %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} ; SKX32-NEXT: retl ; @@ -253,7 +253,7 @@ ; SKX64: # BB#0: ; SKX64-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; SKX64-NEXT: movw $1285, %ax # imm = 0x505 -; SKX64-NEXT: kmovw %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} ; SKX64-NEXT: retq ; @@ -269,7 +269,7 @@ ; SKX32: # BB#0: ; SKX32-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; SKX32-NEXT: movw $1285, %ax # imm = 0x505 -; SKX32-NEXT: kmovw %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} ; SKX32-NEXT: retl ; @@ -289,7 +289,7 @@ ; SKX64: # BB#0: ; SKX64-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA -; SKX64-NEXT: kmovw %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} ; SKX64-NEXT: retq ; @@ -305,7 +305,7 @@ ; SKX32: # BB#0: ; SKX32-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA -; SKX32-NEXT: kmovw %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} ; SKX32-NEXT: retl ; @@ -326,7 +326,7 @@ ; SKX64: # BB#0: ; SKX64-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; SKX64-NEXT: movb $-127, %al -; SKX64-NEXT: kmovb %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} ; SKX64-NEXT: retq ; @@ -342,7 +342,7 @@ ; SKX32: # BB#0: ; SKX32-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; SKX32-NEXT: movb $-127, %al -; SKX32-NEXT: kmovb %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} ; SKX32-NEXT: retl ; @@ -362,7 +362,7 @@ ; SKX64: # BB#0: ; SKX64-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA -; SKX64-NEXT: kmovw %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} ; SKX64-NEXT: retq ; @@ -378,7 +378,7 @@ ; SKX32: # BB#0: ; SKX32-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA -; SKX32-NEXT: kmovw %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} ; SKX32-NEXT: retl ; @@ -398,7 +398,7 @@ ; SKX64: # BB#0: ; SKX64-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; SKX64-NEXT: movb $-127, %al -; SKX64-NEXT: kmovb %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} ; SKX64-NEXT: retq ; @@ -414,7 +414,7 @@ ; SKX32: # BB#0: ; SKX32-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; SKX32-NEXT: movb $-127, %al -; SKX32-NEXT: kmovb %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} ; SKX32-NEXT: retl ; @@ -505,7 +505,7 @@ ; SKX64: # BB#0: ; SKX64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX64-NEXT: movb $20, %al -; SKX64-NEXT: kmovb %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} ; SKX64-NEXT: retq ; @@ -523,7 +523,7 @@ ; SKX32: # BB#0: ; SKX32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; SKX32-NEXT: movb $20, %al -; SKX32-NEXT: kmovb %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} ; SKX32-NEXT: retl ; @@ -681,7 +681,7 @@ ; SKX64-LABEL: test_mm512_mask_blend_epi32: ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA -; SKX64-NEXT: kmovw %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; SKX64-NEXT: retq ; @@ -695,7 +695,7 @@ ; SKX32-LABEL: test_mm512_mask_blend_epi32: ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA -; SKX32-NEXT: kmovw %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; SKX32-NEXT: retl ; @@ -714,7 +714,7 @@ ; SKX64-LABEL: test_mm512_mask_blend_epi64: ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movb $-86, %al -; SKX64-NEXT: kmovb %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; SKX64-NEXT: retq ; @@ -728,7 +728,7 @@ ; SKX32-LABEL: test_mm512_mask_blend_epi64: ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movb $-86, %al -; SKX32-NEXT: kmovb %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; SKX32-NEXT: retl ; @@ -747,7 +747,7 @@ ; SKX64-LABEL: test_mm512_mask_blend_ps: ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA -; SKX64-NEXT: kmovw %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; SKX64-NEXT: retq ; @@ -761,7 +761,7 @@ ; SKX32-LABEL: test_mm512_mask_blend_ps: ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA -; SKX32-NEXT: kmovw %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; SKX32-NEXT: retl ; @@ -780,7 +780,7 @@ ; SKX64-LABEL: test_mm512_mask_blend_pd: ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movb $-88, %al -; SKX64-NEXT: kmovb %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; SKX64-NEXT: retq ; @@ -794,7 +794,7 @@ ; SKX32-LABEL: test_mm512_mask_blend_pd: ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movb $-88, %al -; SKX32-NEXT: kmovb %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; SKX32-NEXT: retl ; @@ -845,7 +845,7 @@ ; SKX64-LABEL: test_mm_mask_blend_epi8: ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA -; SKX64-NEXT: kmovw %eax, %k1 +; SKX64-NEXT: kmovd %eax, %k1 ; SKX64-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} ; SKX64-NEXT: retq ; @@ -858,7 +858,7 @@ ; SKX32-LABEL: test_mm_mask_blend_epi8: ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA -; SKX32-NEXT: kmovw %eax, %k1 +; SKX32-NEXT: kmovd %eax, %k1 ; SKX32-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} ; SKX32-NEXT: retl ; Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -51,7 +51,7 @@ ; ; X64-LABEL: combine_permvar_8f64_identity_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovapd {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0] ; X64-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1} ; X64-NEXT: vmovapd {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8] @@ -89,7 +89,7 @@ ; ; X64-LABEL: combine_permvar_8i64_identity_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0] ; X64-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1} ; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8] @@ -126,7 +126,7 @@ ; ; X64-LABEL: combine_vpermt2var_8f64_identity_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovapd {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0] ; X64-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2 {%k1} {z} ; X64-NEXT: vmovapd {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8] @@ -175,7 +175,7 @@ ; ; X64-LABEL: combine_vpermt2var_8f64_movddup_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; X64-NEXT: retq %res0 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> , <8 x double> %x0, <8 x double> %x1, i8 %m) @@ -207,7 +207,7 @@ ; ; X64-LABEL: combine_vpermt2var_8i64_identity_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0] ; X64-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 {%k1} {z} ; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8] @@ -242,7 +242,7 @@ ; ; X64-LABEL: combine_vpermt2var_16f32_identity_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovaps {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; X64-NEXT: vpermi2ps %zmm1, %zmm0, %zmm2 {%k1} {z} ; X64-NEXT: vmovaps {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] @@ -299,7 +299,7 @@ ; ; X64-LABEL: combine_vpermt2var_16f32_vmovddup_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovaps {{.*#+}} zmm2 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13] ; X64-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} {z} ; X64-NEXT: retq @@ -319,7 +319,7 @@ ; ; X64-LABEL: combine_vpermt2var_16f32_vmovddup_mask_load: ; X64: # BB#0: -; X64-NEXT: kmovw %esi, %k1 +; X64-NEXT: kmovd %esi, %k1 ; X64-NEXT: vmovaps (%rdi), %zmm2 ; X64-NEXT: vmovaps {{.*#+}} zmm1 = [0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13] ; X64-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 {%k1} {z} @@ -367,7 +367,7 @@ ; ; X64-LABEL: combine_vpermt2var_16f32_vmovshdup_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; X64-NEXT: retq %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %x0, <16 x float> %x1, i16 %m) @@ -411,7 +411,7 @@ ; ; X64-LABEL: combine_vpermt2var_16f32_vmovsldup_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; X64-NEXT: retq %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %x0, <16 x float> %x1, i16 %m) @@ -427,7 +427,7 @@ ; ; X64-LABEL: combine_vpermt2var_16f32_vmovsldup_mask_load: ; X64: # BB#0: -; X64-NEXT: kmovw %esi, %k1 +; X64-NEXT: kmovd %esi, %k1 ; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; X64-NEXT: retq %x0 = load <16 x float>, <16 x float> *%p0 @@ -472,7 +472,7 @@ ; ; X64-LABEL: combine_vpermt2var_16f32_vpermilps_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] ; X64-NEXT: retq %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %x0, <16 x float> %x1, i16 %m) @@ -488,7 +488,7 @@ ; ; X64-LABEL: combine_vpermt2var_16f32_vpermilps_mask_load: ; X64: # BB#0: -; X64-NEXT: kmovw %esi, %k1 +; X64-NEXT: kmovd %esi, %k1 ; X64-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] ; X64-NEXT: retq %x0 = load <16 x float>, <16 x float> *%p0 @@ -520,7 +520,7 @@ ; ; X64-LABEL: combine_vpermt2var_16i32_identity_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovdqa32 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; X64-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 {%k1} {z} ; X64-NEXT: vmovdqa32 {{.*#+}} zmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] @@ -675,7 +675,7 @@ ; ; X64-LABEL: combine_permvar_8i64_as_permq_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4] ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ; X64-NEXT: retq @@ -707,7 +707,7 @@ ; ; X64-LABEL: combine_permvar_8f64_as_permpd_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4] ; X64-NEXT: vmovapd %zmm1, %zmm0 ; X64-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll @@ -30,7 +30,7 @@ ; ; X64-LABEL: combine_vpermt2var_16i16_identity_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovdqu {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; X64-NEXT: vpermi2w %ymm1, %ymm0, %ymm2 {%k1} {z} ; X64-NEXT: vmovdqu {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll @@ -45,7 +45,7 @@ ; ; X64-LABEL: combine_vpermt2var_16i8_identity_mask: ; X64: # BB#0: -; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vmovdqu {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; X64-NEXT: vpermi2b %xmm1, %xmm0, %xmm2 {%k1} {z} ; X64-NEXT: vmovdqu {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-masked.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-masked.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-masked.ll @@ -4,7 +4,7 @@ define <4 x i32> @mask_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v4i32_1234: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[1,2,3],xmm1[0] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -18,7 +18,7 @@ define <4 x i32> @maskz_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v4i32_1234: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3],xmm1[0] ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -31,7 +31,7 @@ define <4 x i32> @mask_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v4i32_2345: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[2,3],xmm1[0,1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -45,7 +45,7 @@ define <4 x i32> @maskz_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v4i32_2345: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3],xmm1[0,1] ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -58,7 +58,7 @@ define <2 x i64> @mask_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v2i64_12: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignq {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -72,7 +72,7 @@ define <2 x i64> @maskz_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v2i64_12: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignq {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0] ; CHECK-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -85,7 +85,7 @@ define <4 x i64> @mask_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v4i64_1234: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignq {{.*#+}} ymm2 {%k1} = ymm0[1,2,3],ymm1[0] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -99,7 +99,7 @@ define <4 x i64> @maskz_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v4i64_1234: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3],ymm1[0] ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -112,7 +112,7 @@ define <4 x i64> @mask_shuffle_v4i64_1230(<4 x i64> %a, <4 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v4i64_1230: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,0] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -126,7 +126,7 @@ define <4 x i64> @maskz_shuffle_v4i64_1230(<4 x i64> %a, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v4i64_1230: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,0] ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> @@ -139,7 +139,7 @@ define <8 x i32> @mask_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v8i32_12345678: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[1,2,3,4,5,6,7],ymm1[0] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -152,7 +152,7 @@ define <8 x i32> @maskz_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v8i32_12345678: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7],ymm1[0] ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -164,7 +164,7 @@ define <8 x i32> @mask_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v8i32_23456789: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[2,3,4,5,6,7],ymm1[0,1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -177,7 +177,7 @@ define <8 x i32> @maskz_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v8i32_23456789: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,4,5,6,7],ymm1[0,1] ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -189,7 +189,7 @@ define <8 x i32> @mask_shuffle_v8i32_12345670(<8 x i32> %a, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v8i32_12345670: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,4,5,6,7,0] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -202,7 +202,7 @@ define <8 x i32> @maskz_shuffle_v8i32_12345670(<8 x i32> %a, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v8i32_12345670: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7,0] ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> @@ -215,7 +215,7 @@ ; CHECK-LABEL: mask_shuffle_v8i32_23456701: ; CHECK: # BB#0: ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0] -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> @@ -228,7 +228,7 @@ ; CHECK-LABEL: maskz_shuffle_v8i32_23456701: ; CHECK: # BB#0: ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0] -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> @@ -240,7 +240,7 @@ define <4 x i32> @mask_extract_v16i32_v4i32_0(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v4i32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti32x4 $0, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -255,7 +255,7 @@ define <4 x i32> @mask_extract_v16i32_v4i32_1(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v4i32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -270,7 +270,7 @@ define <4 x i32> @mask_extract_v16i32_v4i32_2(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v4i32_2: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -285,7 +285,7 @@ define <4 x i32> @mask_extract_v16i32_v4i32_3(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v4i32_3: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti32x4 $3, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -300,7 +300,7 @@ define <4 x float> @mask_extract_v16f32_v4f32_0(<16 x float> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v4f32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf32x4 $0, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -315,7 +315,7 @@ define <4 x float> @mask_extract_v16f32_v4f32_1(<16 x float> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v4f32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -330,7 +330,7 @@ define <4 x float> @mask_extract_v16f32_v4f32_2(<16 x float> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v4f32_2: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -345,7 +345,7 @@ define <4 x float> @mask_extract_v16f32_v4f32_3(<16 x float> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v4f32_3: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -360,7 +360,7 @@ define <8 x i32> @mask_extract_v16i32_v8i32_0(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v8i32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti32x8 $0, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -373,7 +373,7 @@ define <8 x i32> @mask_extract_v16i32_v8i32_1(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v8i32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -386,7 +386,7 @@ define <8 x float> @mask_extract_v16f32_v8f32_0(<16 x float> %a, <8 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v8f32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf32x8 $0, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -399,7 +399,7 @@ define <8 x float> @mask_extract_v16f32_v8f32_1(<16 x float> %a, <8 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v8f32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -412,7 +412,7 @@ define <2 x i64> @mask_extract_v8i64_v2i64_0(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v2i64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti64x2 $0, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -427,7 +427,7 @@ define <2 x i64> @mask_extract_v8i64_v2i64_1(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v2i64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -442,7 +442,7 @@ define <2 x i64> @mask_extract_v8i64_v2i64_2(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v2i64_2: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti64x2 $2, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -457,7 +457,7 @@ define <2 x i64> @mask_extract_v8i64_v2i64_3(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v2i64_3: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti64x2 $3, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -472,7 +472,7 @@ define <2 x double> @mask_extract_v8f64_v2f64_0(<8 x double> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v2f64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf64x2 $0, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -487,7 +487,7 @@ define <2 x double> @mask_extract_v8f64_v2f64_1(<8 x double> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v2f64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -502,7 +502,7 @@ define <2 x double> @mask_extract_v8f64_v2f64_2(<8 x double> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v2f64_2: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf64x2 $2, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -517,7 +517,7 @@ define <2 x double> @mask_extract_v8f64_v2f64_3(<8 x double> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v2f64_3: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf64x2 $3, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -532,7 +532,7 @@ define <4 x i64> @mask_extract_v8i64_v4i64_0(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v4i64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti64x4 $0, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -546,7 +546,7 @@ define <4 x i64> @mask_extract_v8i64_v4i64_1(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v4i64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -560,7 +560,7 @@ define <4 x double> @mask_extract_v8f64_v4f64_0(<8 x double> %a, <4 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v4f64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf64x4 $0, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -574,7 +574,7 @@ define <4 x double> @mask_extract_v8f64_v4f64_1(<8 x double> %a, <4 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v4f64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -588,7 +588,7 @@ define <8 x i32> @mask_extract_v8i64_v8i32_1(<8 x i64> %a, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v8i32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -602,7 +602,7 @@ define <8 x float> @mask_extract_v8f64_v8f32_1(<8 x double> %a, <8 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v8f32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -616,7 +616,7 @@ define <4 x i32> @mask_cast_extract_v8i64_v4i32_1(<8 x i64> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -632,7 +632,7 @@ define <4 x float> @mask_cast_extract_v8f64_v4f32_1(<8 x double> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -648,7 +648,7 @@ define <4 x i64> @mask_cast_extract_v16i32_v4i64_1(<16 x i32> %a, <4 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -663,7 +663,7 @@ define <4 x double> @mask_cast_extract_v16f32_v4f64_1(<16 x float> %a, <4 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -678,7 +678,7 @@ define <2 x i64> @mask_cast_extract_v16i32_v2i64_1(<16 x i32> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -694,7 +694,7 @@ define <2 x double> @mask_cast_extract_v16f32_v2f64_1(<16 x float> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -710,7 +710,7 @@ define <2 x double> @broadcast_v4f32_0101_from_v2f32_mask(double* %x, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] ; CHECK-NEXT: retq %q = load double, double* %x, align 1 @@ -725,7 +725,7 @@ define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) { ; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_maskz: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0] ; CHECK-NEXT: retq %q = load double, double* %x, align 1 @@ -740,7 +740,7 @@ define <8 x float> @test_broadcast_2f64_8f32(<2 x double> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_2f64_8f32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %1 = load <2 x double>, <2 x double> *%p @@ -754,7 +754,7 @@ define <8 x i32> @test_broadcast_2i64_8i32(<2 x i64> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_2i64_8i32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %1 = load <2 x i64>, <2 x i64> *%p @@ -824,7 +824,7 @@ define <4 x double> @test_broadcast_4f32_4f64(<4 x float> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_4f32_4f64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %1 = load <4 x float>, <4 x float> *%p @@ -839,7 +839,7 @@ define <4 x i64> @test_broadcast_4i32_4i64(<4 x i32> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_4i32_4i64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %1 = load <4 x i32>, <4 x i32> *%p @@ -854,7 +854,7 @@ define <8 x double> @test_broadcast_4f32_8f64(<4 x float> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_4f32_8f64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %1 = load <4 x float>, <4 x float> *%p @@ -868,7 +868,7 @@ define <8 x i64> @test_broadcast_4i32_8i64(<4 x i32> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_4i32_8i64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %1 = load <4 x i32>, <4 x i32> *%p @@ -882,7 +882,7 @@ define <8 x double> @test_broadcast_8f32_8f64(<8 x float> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_8f32_8f64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %1 = load <8 x float>, <8 x float> *%p @@ -896,7 +896,7 @@ define <8 x i64> @test_broadcast_8i32_8i64(<8 x i32> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_8i32_8i64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: kmovw %esi, %k1 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %1 = load <8 x i32>, <8 x i32> *%p Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll @@ -35,11 +35,11 @@ ; VL_BW_DQ: # BB#0: ; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0 ; VL_BW_DQ-NEXT: vptestmq %xmm0, %xmm0, %k0 -; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0 ; VL_BW_DQ-NEXT: movb $1, %al -; VL_BW_DQ-NEXT: kmovb %eax, %k0 +; VL_BW_DQ-NEXT: kmovd %eax, %k1 +; VL_BW_DQ-NEXT: vpmovm2q %k1, %xmm0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm1 -; VL_BW_DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; VL_BW_DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0 ; VL_BW_DQ-NEXT: retq @@ -171,7 +171,7 @@ ; ; VL_BW_DQ-LABEL: shuf8i1_u_2_u_u_2_u_2_u: ; VL_BW_DQ: # BB#0: -; VL_BW_DQ-NEXT: kmovb %edi, %k0 +; VL_BW_DQ-NEXT: kmovd %edi, %k0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0 ; VL_BW_DQ-NEXT: vextracti64x2 $1, %zmm0, %xmm0 ; VL_BW_DQ-NEXT: vpbroadcastq %xmm0, %zmm0 @@ -195,18 +195,20 @@ ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: %AL %AL %EAX ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; VL_BW_DQ-LABEL: shuf8i1_10_2_9_u_3_u_2_u: ; VL_BW_DQ: # BB#0: -; VL_BW_DQ-NEXT: kmovb %edi, %k0 +; VL_BW_DQ-NEXT: kmovd %edi, %k0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0 ; VL_BW_DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u> ; VL_BW_DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0 -; VL_BW_DQ-NEXT: kmovb %k0, %eax +; VL_BW_DQ-NEXT: kmovd %k0, %eax +; VL_BW_DQ-NEXT: # kill: %AL %AL %EAX ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq %b = bitcast i8 %a to <8 x i1> @@ -224,16 +226,18 @@ ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: %AL %AL %EAX ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; VL_BW_DQ-LABEL: shuf8i1_0_1_4_5_u_u_u_u: ; VL_BW_DQ: # BB#0: -; VL_BW_DQ-NEXT: kmovb %edi, %k0 +; VL_BW_DQ-NEXT: kmovd %edi, %k0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0 ; VL_BW_DQ-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1] ; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0 -; VL_BW_DQ-NEXT: kmovb %k0, %eax +; VL_BW_DQ-NEXT: kmovd %k0, %eax +; VL_BW_DQ-NEXT: # kill: %AL %AL %EAX ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq %b = bitcast i8 %a to <8 x i1> @@ -253,18 +257,20 @@ ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: %AL %AL %EAX ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; VL_BW_DQ-LABEL: shuf8i1_9_6_1_0_3_7_7_0: ; VL_BW_DQ: # BB#0: -; VL_BW_DQ-NEXT: kmovb %edi, %k0 +; VL_BW_DQ-NEXT: kmovd %edi, %k0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0 ; VL_BW_DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0] ; VL_BW_DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0 -; VL_BW_DQ-NEXT: kmovb %k0, %eax +; VL_BW_DQ-NEXT: kmovd %k0, %eax +; VL_BW_DQ-NEXT: # kill: %AL %AL %EAX ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq %b = bitcast i8 %a to <8 x i1> @@ -284,18 +290,20 @@ ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: %AL %AL %EAX ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0: ; VL_BW_DQ: # BB#0: -; VL_BW_DQ-NEXT: kmovb %edi, %k0 +; VL_BW_DQ-NEXT: kmovd %edi, %k0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0 ; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7] ; VL_BW_DQ-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; VL_BW_DQ-NEXT: vpermt2q %zmm0, %zmm1, %zmm2 ; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0 -; VL_BW_DQ-NEXT: kmovb %k0, %eax +; VL_BW_DQ-NEXT: kmovd %k0, %eax +; VL_BW_DQ-NEXT: # kill: %AL %AL %EAX ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq %b = bitcast i8 %a to <8 x i1> @@ -317,20 +325,22 @@ ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: %AL %AL %EAX ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; VL_BW_DQ-LABEL: shuf8i1__9_6_1_10_3_7_7_1: ; VL_BW_DQ: # BB#0: -; VL_BW_DQ-NEXT: kmovb %edi, %k0 +; VL_BW_DQ-NEXT: kmovd %edi, %k0 ; VL_BW_DQ-NEXT: movb $51, %al -; VL_BW_DQ-NEXT: kmovb %eax, %k1 +; VL_BW_DQ-NEXT: kmovd %eax, %k1 ; VL_BW_DQ-NEXT: vpmovm2q %k1, %zmm0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm1 ; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1] ; VL_BW_DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0 -; VL_BW_DQ-NEXT: kmovb %k0, %eax +; VL_BW_DQ-NEXT: kmovd %k0, %eax +; VL_BW_DQ-NEXT: # kill: %AL %AL %EAX ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq %b = bitcast i8 %a to <8 x i1> @@ -352,6 +362,7 @@ ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: %AL %AL %EAX ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -364,7 +375,8 @@ ; VL_BW_DQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 ; VL_BW_DQ-NEXT: vpermt2q %zmm0, %zmm1, %zmm2 ; VL_BW_DQ-NEXT: vpmovq2m %zmm2, %k0 -; VL_BW_DQ-NEXT: kmovb %k0, %eax +; VL_BW_DQ-NEXT: kmovd %k0, %eax +; VL_BW_DQ-NEXT: # kill: %AL %AL %EAX ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq %c = shufflevector <8 x i1> , <8 x i1> %a, <8 x i32> @@ -382,16 +394,18 @@ ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: # kill: %AX %AX %EAX ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; VL_BW_DQ-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0: ; VL_BW_DQ: # BB#0: -; VL_BW_DQ-NEXT: kmovw %edi, %k0 +; VL_BW_DQ-NEXT: kmovd %edi, %k0 ; VL_BW_DQ-NEXT: vpmovm2d %k0, %zmm0 ; VL_BW_DQ-NEXT: vpbroadcastd %xmm0, %zmm0 ; VL_BW_DQ-NEXT: vpmovd2m %zmm0, %k0 -; VL_BW_DQ-NEXT: kmovw %k0, %eax +; VL_BW_DQ-NEXT: kmovd %k0, %eax +; VL_BW_DQ-NEXT: # kill: %AX %AX %EAX ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq %b = bitcast i16 %a to <16 x i1>