Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -6953,6 +6953,14 @@ GCCBuiltin<"__builtin_ia32_cmppd128_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_ss : + GCCBuiltin<"__builtin_ia32_cmpss_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_sd : + GCCBuiltin<"__builtin_ia32_cmpsd_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -15753,17 +15753,20 @@ SDValue PreservedSrc, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - if (isAllOnes(Mask)) - return Op; + if (isAllOnes(Mask)) + return Op; - EVT VT = Op.getValueType(); - SDLoc dl(Op); - // The mask should be of type MVT::i1 - SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask); + EVT VT = Op.getValueType(); + SDLoc dl(Op); + // The mask should be of type MVT::i1 + SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask); - if (PreservedSrc.getOpcode() == ISD::UNDEF) - PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); - return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc); + if (Op.getOpcode() == X86ISD::FSETCC) + return DAG.getNode(ISD::AND, dl, VT, Op, IMask); + + if (PreservedSrc.getOpcode() == ISD::UNDEF) + PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc); } static int getSEHRegistrationNodeSize(const Function *Fn) { @@ -16124,6 +16127,32 @@ DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(Op.getValueType(), Res); } + case CMP_MASK_SCALAR_CC: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3)); + SDValue Mask = Op.getOperand(4); + + SDValue Cmp; + if (IntrData->Opc1 != 0) { + SDValue Rnd = Op.getOperand(5); + if (cast(Rnd)->getZExtValue() != + X86::STATIC_ROUNDING::CUR_DIRECTION) + Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd); + } + //default rounding mode + if(!Cmp.getNode()) + Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Src2, CC); + + SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, + DAG.getTargetConstant(0, dl, + MVT::i1), + Subtarget, DAG); + + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i8, + DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, CmpMask), + DAG.getValueType(MVT::i1)); + } case COMI: { // Comparison intrinsics ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; SDValue LHS = Op.getOperand(1); Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -1361,41 +1361,85 @@ //===----------------------------------------------------------------------===// // avx512_cmp_scalar - AVX512 CMPSS and CMPSD -multiclass avx512_cmp_scalar { - def rr : AVX512Ii8<0xC2, MRMSrcReg, - (outs VK1:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", Suffix, + +multiclass avx512_cmp_scalar{ + + defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)>, EVEX_4V; + let mayLoad = 1 in + defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (OpNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))), + imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + + defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "{sae}, $src2, $src1", "$src1, $src2,{sae}", + (OpNodeRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc, + (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B; + // Accept explicit immediate argument form instead of comparison code. + let isAsmParserOnly = 1, hasSideEffects = 0 in { + defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs VK1:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V; + defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + + defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">, + EVEX_4V, EVEX_B; + }// let isAsmParserOnly = 1, hasSideEffects = 0 + + let isCodeGenOnly = 1 in { + def rr : AVX512Ii8<0xC2, MRMSrcReg, + (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc), + !strconcat("vcmp${cc}", _.Suffix, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], + [(set _.KRC:$dst, (OpNode _.FRC:$src1, + _.FRC:$src2, + imm:$cc))], IIC_SSE_ALU_F32S_RR>, EVEX_4V; - def rm : AVX512Ii8<0xC2, MRMSrcMem, - (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", Suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VK1:$dst, (OpNode (VT RC:$src1), - (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; - let isAsmParserOnly = 1, hasSideEffects = 0 in { - def rri_alt : AVX512Ii8<0xC2, MRMSrcReg, - (outs VK1:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), - !strconcat("vcmp", Suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), - [], IIC_SSE_ALU_F32S_RR>, EVEX_4V; let mayLoad = 1 in - def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem, - (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), - !strconcat("vcmp", Suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), - [], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + def rm : AVX512Ii8<0xC2, MRMSrcMem, + (outs _.KRC:$dst), + (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), + !strconcat("vcmp${cc}", _.Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.KRC:$dst, (OpNode _.FRC:$src1, + (_.ScalarLdFrag addr:$src2), + imm:$cc))], + IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; } } let Predicates = [HasAVX512] in { -defm VCMPSSZ : avx512_cmp_scalar, - XS; -defm VCMPSDZ : avx512_cmp_scalar, - XD, VEX_W; + defm VCMPSSZ : avx512_cmp_scalar, + AVX512XSIi8Base; + defm VCMPSDZ : avx512_cmp_scalar, + AVX512XDIi8Base, VEX_W; } multiclass avx512_icmp_packed opc, string OpcodeStr, SDNode OpNode, @@ -2021,10 +2065,15 @@ (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>; def : Pat<(i32 (anyext VK1:$src)), (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>; + def : Pat<(i8 (zext VK1:$src)), (EXTRACT_SUBREG (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>; + def : Pat<(i8 (anyext VK1:$src)), + (EXTRACT_SUBREG + (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>; + def : Pat<(i64 (zext VK1:$src)), (AND64ri8 (SUBREG_TO_REG (i64 0), (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>; Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -160,10 +160,15 @@ def X86CmpMaskCCScalar : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; -def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; -def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>; -def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; -def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>; +def X86CmpMaskCCScalarRound : + SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>, + SDTCisInt<4>]>; + +def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; +def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>; +def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; +def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>; +def X86cmpmsRnd : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalarRound>; def X86vshl : SDNode<"X86ISD::VSHL", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -20,7 +20,7 @@ INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP, - CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, + CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK, @@ -428,6 +428,10 @@ X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC, X86ISD::FSETCC, + X86ISD::FSETCC), + X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC, X86ISD::FSETCC, + X86ISD::FSETCC), X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0), Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -3997,6 +3997,96 @@ ret <2 x double> %res } +declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32) + +define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: shlb $7, %al +; CHECK-NEXT: sarb $7, %al +; CHECK-NEXT: retq + + %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) + ret i8 %res4 +} + +define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all: +; CHECK: ## BB#0: +; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0 +; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1 +; CHECK-NEXT: korw %k0, %k1, %k0 +; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1 +; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2 +; CHECK-NEXT: korw %k1, %k2, %k1 +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k2 +; CHECK-NEXT: kandw %k2, %k1, %k1 +; CHECK-NEXT: korw %k1, %k0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: shlb $7, %al +; CHECK-NEXT: sarb $7, %al +; CHECK-NEXT: retq + + %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4) + %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8) + %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4) + %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) + + %res11 = or i8 %res1, %res2 + %res12 = or i8 %res3, %res4 + %res13 = or i8 %res11, %res12 + ret i8 %res13 +} + +declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32) + +define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: shlb $7, %al +; CHECK-NEXT: sarb $7, %al +; CHECK-NEXT: retq + + %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4) + ret i8 %res2 +} + + +define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all: +; CHECK: ## BB#0: +; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1 +; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k1 +; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1} +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k2 +; CHECK-NEXT: kandw %k2, %k1, %k1 +; CHECK-NEXT: kandw %k1, %k0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: shlb $7, %al +; CHECK-NEXT: sarb $7, %al +; CHECK-NEXT: retq + %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4) + %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8) + %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4) + %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8) + + %res11 = and i8 %res1, %res2 + %res12 = and i8 %res3, %res4 + %res13 = and i8 %res11, %res12 + ret i8 %res13 +} + declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16) define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { Index: test/MC/X86/avx512-encodings.s =================================================================== --- test/MC/X86/avx512-encodings.s +++ test/MC/X86/avx512-encodings.s @@ -14958,6 +14958,94 @@ // CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0xf8,0xfb,0xff,0xff] vgetexpsd -1032(%rdx), %xmm7, %xmm2 +// CHECK: vcmpss $171, %xmm12, %xmm15, %k4 +// CHECK: encoding: [0x62,0xd1,0x06,0x08,0xc2,0xe4,0xab] + vcmpss $0xab, %xmm12, %xmm15, %k4 + +// CHECK: vcmpss $171, %xmm12, %xmm15, %k4 {%k5} +// CHECK: encoding: [0x62,0xd1,0x06,0x0d,0xc2,0xe4,0xab] + vcmpss $0xab, %xmm12, %xmm15, %k4 {%k5} + +// CHECK: vcmpss $171,{sae}, %xmm12, %xmm15, %k4 +// CHECK: encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0xab] + vcmpss $0xab,{sae}, %xmm12, %xmm15, %k4 + +// CHECK: vcmpss $123, %xmm12, %xmm15, %k4 +// CHECK: encoding: [0x62,0xd1,0x06,0x08,0xc2,0xe4,0x7b] + vcmpss $0x7b, %xmm12, %xmm15, %k4 + +// CHECK: vcmpss $123,{sae}, %xmm12, %xmm15, %k4 +// CHECK: encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0x7b] + vcmpss $0x7b,{sae}, %xmm12, %xmm15, %k4 + +// CHECK: vcmpss $123, (%rcx), %xmm15, %k4 +// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x21,0x7b] + vcmpss $0x7b, (%rcx), %xmm15, %k4 + +// CHECK: vcmpss $123, 291(%rax,%r14,8), %xmm15, %k4 +// CHECK: encoding: [0x62,0xb1,0x06,0x08,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmpss $0x7b, 291(%rax,%r14,8), %xmm15, %k4 + +// CHECK: vcmpss $123, 508(%rdx), %xmm15, %k4 +// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x62,0x7f,0x7b] + vcmpss $0x7b, 508(%rdx), %xmm15, %k4 + +// CHECK: vcmpss $123, 512(%rdx), %xmm15, %k4 +// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b] + vcmpss $0x7b, 512(%rdx), %xmm15, %k4 + +// CHECK: vcmpss $123, -512(%rdx), %xmm15, %k4 +// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x62,0x80,0x7b] + vcmpss $0x7b, -512(%rdx), %xmm15, %k4 + +// CHECK: vcmpss $123, -516(%rdx), %xmm15, %k4 +// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b] + vcmpss $0x7b, -516(%rdx), %xmm15, %k4 + +// CHECK: vcmpsd $171, %xmm4, %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xec,0xab] + vcmpsd $0xab, %xmm4, %xmm19, %k5 + +// CHECK: vcmpsd $171, %xmm4, %xmm19, %k5 {%k1} +// CHECK: encoding: [0x62,0xf1,0xe7,0x01,0xc2,0xec,0xab] + vcmpsd $0xab, %xmm4, %xmm19, %k5 {%k1} + +// CHECK: vcmpsd $171,{sae}, %xmm4, %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0xab] + vcmpsd $0xab,{sae}, %xmm4, %xmm19, %k5 + +// CHECK: vcmpsd $123, %xmm4, %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xec,0x7b] + vcmpsd $0x7b, %xmm4, %xmm19, %k5 + +// CHECK: vcmpsd $123,{sae}, %xmm4, %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0x7b] + vcmpsd $0x7b,{sae}, %xmm4, %xmm19, %k5 + +// CHECK: vcmpsd $123, (%rcx), %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x29,0x7b] + vcmpsd $0x7b, (%rcx), %xmm19, %k5 + +// CHECK: vcmpsd $123, 291(%rax,%r14,8), %xmm19, %k5 +// CHECK: encoding: [0x62,0xb1,0xe7,0x00,0xc2,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmpsd $0x7b, 291(%rax,%r14,8), %xmm19, %k5 + +// CHECK: vcmpsd $123, 1016(%rdx), %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x6a,0x7f,0x7b] + vcmpsd $0x7b, 1016(%rdx), %xmm19, %k5 + +// CHECK: vcmpsd $123, 1024(%rdx), %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xaa,0x00,0x04,0x00,0x00,0x7b] + vcmpsd $0x7b, 1024(%rdx), %xmm19, %k5 + +// CHECK: vcmpsd $123, -1024(%rdx), %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x6a,0x80,0x7b] + vcmpsd $0x7b, -1024(%rdx), %xmm19, %k5 + +// CHECK: vcmpsd $123, -1032(%rdx), %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xaa,0xf8,0xfb,0xff,0xff,0x7b] + vcmpsd $0x7b, -1032(%rdx), %xmm19, %k5 + // CHECK: vsqrtss %xmm8, %xmm19, %xmm22 // CHECK: encoding: [0x62,0xc1,0x66,0x00,0x51,0xf0] vsqrtss %xmm8, %xmm19, %xmm22