Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -6777,6 +6777,14 @@ GCCBuiltin<"__builtin_ia32_cmppd128_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_ss : + GCCBuiltin<"__builtin_ia32_cmpss_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_sd : + GCCBuiltin<"__builtin_ia32_cmpsd_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">, Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -105,6 +105,7 @@ /// Operands are two FP values to compare; result is a mask of /// 0s or 1s. Generally DTRT for C/C++ with NaNs. FSETCC, + FSETCC_RND, /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values, /// result in an integer GPR. Needs masking for scalar result. Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -15524,6 +15524,13 @@ // The mask should be of type MVT::i1 SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask); + switch (Op.getOpcode()) { + default: break; + case X86ISD::FSETCC: + case X86ISD::FSETCC_RND: + return DAG.getNode(ISD::AND, dl, VT, Op, IMask); + } + if (PreservedSrc.getOpcode() == ISD::UNDEF) PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc); @@ -15858,6 +15865,29 @@ DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(Op.getValueType(), Res); } + case CMP_MASK_SCALAR_CC: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3)); + SDValue Mask = Op.getOperand(4); + + SDValue Cmp; + if (IntrData->Opc1 != 0) { + SDValue Rnd = Op.getOperand(5); + if (cast(Rnd)->getZExtValue() != + X86::STATIC_ROUNDING::CUR_DIRECTION) + Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd); + } + //default rounding mode + if(!Cmp.getNode()) + Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Src2, CC); + + SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, + DAG.getTargetConstant(0, dl, + MVT::i1), + Subtarget, DAG); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, CmpMask); + } case COMI: { // Comparison intrinsics ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; SDValue LHS = Op.getOperand(1); @@ -19264,6 +19294,7 @@ case X86ISD::SETCC: return "X86ISD::SETCC"; case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; case X86ISD::FSETCC: return "X86ISD::FSETCC"; + case X86ISD::FSETCC_RND: return "X86ISD::FSETCC_RND"; case X86ISD::FGETSIGNx86: return "X86ISD::FGETSIGNx86"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -1265,41 +1265,85 @@ //===----------------------------------------------------------------------===// // avx512_cmp_scalar - AVX512 CMPSS and CMPSD -multiclass avx512_cmp_scalar { - def rr : AVX512Ii8<0xC2, MRMSrcReg, - (outs VK1:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", Suffix, + +multiclass avx512_cmp_scalar{ + + defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)>, EVEX_4V; + let mayLoad = 1 in + defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (OpNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))), + imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + + defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "{sae}, $src2, $src1", "$src1, $src2,{sae}", + (OpNodeRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc, + (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B; + // Accept explicit immediate argument form instead of comparison code. + let isAsmParserOnly = 1, hasSideEffects = 0 in { + defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs VK1:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V; + defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + + defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">, + EVEX_4V, EVEX_B; + }// let isAsmParserOnly = 1, hasSideEffects = 0 + + let isCodeGenOnly = 1 in { + def rr : AVX512Ii8<0xC2, MRMSrcReg, + (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc), + !strconcat("vcmp${cc}", _.Suffix, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], + [(set _.KRC:$dst, (OpNode _.FRC:$src1, + _.FRC:$src2, + imm:$cc))], IIC_SSE_ALU_F32S_RR>, EVEX_4V; - def rm : AVX512Ii8<0xC2, MRMSrcMem, - (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", Suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VK1:$dst, (OpNode (VT RC:$src1), - (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; - let isAsmParserOnly = 1, hasSideEffects = 0 in { - def rri_alt : AVX512Ii8<0xC2, MRMSrcReg, - (outs VK1:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), - !strconcat("vcmp", Suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), - [], IIC_SSE_ALU_F32S_RR>, EVEX_4V; let mayLoad = 1 in - def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem, - (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), - !strconcat("vcmp", Suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), - [], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + def rm : AVX512Ii8<0xC2, MRMSrcMem, + (outs _.KRC:$dst), + (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), + !strconcat("vcmp${cc}", _.Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.KRC:$dst, (OpNode _.FRC:$src1, + (_.ScalarLdFrag addr:$src2), + imm:$cc))], + IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; } } let Predicates = [HasAVX512] in { -defm VCMPSSZ : avx512_cmp_scalar, - XS; -defm VCMPSDZ : avx512_cmp_scalar, - XD, VEX_W; + defm VCMPSSZ : avx512_cmp_scalar, + AVX512XSIi8Base; + defm VCMPSDZ : avx512_cmp_scalar, + AVX512XDIi8Base, VEX_W; } multiclass avx512_icmp_packed opc, string OpcodeStr, SDNode OpNode, Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -160,10 +160,15 @@ def X86CmpMaskCCScalar : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; -def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; -def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>; -def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; -def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>; +def X86CmpMaskCCScalarRound : + SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>, + SDTCisInt<4>]>; + +def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; +def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>; +def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; +def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>; +def X86cmpmsRnd : SDNode<"X86ISD::FSETCC_RND", X86CmpMaskCCScalarRound>; def X86vshl : SDNode<"X86ISD::VSHL", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -20,7 +20,7 @@ INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP, - CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, + CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK, @@ -428,6 +428,10 @@ X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC, X86ISD::FSETCC, + X86ISD::FSETCC_RND), + X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC, X86ISD::FSETCC, + X86ISD::FSETCC_RND), X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0), Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -3958,6 +3958,74 @@ ret <2 x double> %res } +declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32) + +define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %edx +; CHECK-NEXT: andl $1, %edx +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %esi +; CHECK-NEXT: andl $1, %esi +; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: addb %cl, %dl +; CHECK-NEXT: addb %sil, %al +; CHECK-NEXT: addb %dl, %al +; CHECK-NEXT: retq + %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4) + %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8) + %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4) + %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) + + %res11 = add i8 %res1, %res2 + %res12 = add i8 %res3, %res4 + %res13 = add i8 %res11, %res12 + ret i8 %res13 +} + +declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32) + +define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, %edx +; CHECK-NEXT: andl $1, %edx +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %esi +; CHECK-NEXT: andl $1, %esi +; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: addb %cl, %dl +; CHECK-NEXT: addb %sil, %al +; CHECK-NEXT: addb %dl, %al +; CHECK-NEXT: retq + %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4) + %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8) + %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4) + %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8) + + %res11 = add i8 %res1, %res2 + %res12 = add i8 %res3, %res4 + %res13 = add i8 %res11, %res12 + ret i8 %res13 +} + declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32) define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { Index: test/MC/X86/avx512-encodings.s =================================================================== --- test/MC/X86/avx512-encodings.s +++ test/MC/X86/avx512-encodings.s @@ -14958,6 +14958,94 @@ // CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0xf8,0xfb,0xff,0xff] vgetexpsd -1032(%rdx), %xmm7, %xmm2 +// CHECK: vcmpss $171, %xmm12, %xmm15, %k4 +// CHECK: encoding: [0x62,0xd1,0x06,0x08,0xc2,0xe4,0xab] + vcmpss $0xab, %xmm12, %xmm15, %k4 + +// CHECK: vcmpss $171, %xmm12, %xmm15, %k4 {%k5} +// CHECK: encoding: [0x62,0xd1,0x06,0x0d,0xc2,0xe4,0xab] + vcmpss $0xab, %xmm12, %xmm15, %k4 {%k5} + +// CHECK: vcmpss $171,{sae}, %xmm12, %xmm15, %k4 +// CHECK: encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0xab] + vcmpss $0xab,{sae}, %xmm12, %xmm15, %k4 + +// CHECK: vcmpss $123, %xmm12, %xmm15, %k4 +// CHECK: encoding: [0x62,0xd1,0x06,0x08,0xc2,0xe4,0x7b] + vcmpss $0x7b, %xmm12, %xmm15, %k4 + +// CHECK: vcmpss $123,{sae}, %xmm12, %xmm15, %k4 +// CHECK: encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0x7b] + vcmpss $0x7b,{sae}, %xmm12, %xmm15, %k4 + +// CHECK: vcmpss $123, (%rcx), %xmm15, %k4 +// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x21,0x7b] + vcmpss $0x7b, (%rcx), %xmm15, %k4 + +// CHECK: vcmpss $123, 291(%rax,%r14,8), %xmm15, %k4 +// CHECK: encoding: [0x62,0xb1,0x06,0x08,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmpss $0x7b, 291(%rax,%r14,8), %xmm15, %k4 + +// CHECK: vcmpss $123, 508(%rdx), %xmm15, %k4 +// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x62,0x7f,0x7b] + vcmpss $0x7b, 508(%rdx), %xmm15, %k4 + +// CHECK: vcmpss $123, 512(%rdx), %xmm15, %k4 +// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b] + vcmpss $0x7b, 512(%rdx), %xmm15, %k4 + +// CHECK: vcmpss $123, -512(%rdx), %xmm15, %k4 +// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x62,0x80,0x7b] + vcmpss $0x7b, -512(%rdx), %xmm15, %k4 + +// CHECK: vcmpss $123, -516(%rdx), %xmm15, %k4 +// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b] + vcmpss $0x7b, -516(%rdx), %xmm15, %k4 + +// CHECK: vcmpsd $171, %xmm4, %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xec,0xab] + vcmpsd $0xab, %xmm4, %xmm19, %k5 + +// CHECK: vcmpsd $171, %xmm4, %xmm19, %k5 {%k1} +// CHECK: encoding: [0x62,0xf1,0xe7,0x01,0xc2,0xec,0xab] + vcmpsd $0xab, %xmm4, %xmm19, %k5 {%k1} + +// CHECK: vcmpsd $171,{sae}, %xmm4, %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0xab] + vcmpsd $0xab,{sae}, %xmm4, %xmm19, %k5 + +// CHECK: vcmpsd $123, %xmm4, %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xec,0x7b] + vcmpsd $0x7b, %xmm4, %xmm19, %k5 + +// CHECK: vcmpsd $123,{sae}, %xmm4, %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0x7b] + vcmpsd $0x7b,{sae}, %xmm4, %xmm19, %k5 + +// CHECK: vcmpsd $123, (%rcx), %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x29,0x7b] + vcmpsd $0x7b, (%rcx), %xmm19, %k5 + +// CHECK: vcmpsd $123, 291(%rax,%r14,8), %xmm19, %k5 +// CHECK: encoding: [0x62,0xb1,0xe7,0x00,0xc2,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcmpsd $0x7b, 291(%rax,%r14,8), %xmm19, %k5 + +// CHECK: vcmpsd $123, 1016(%rdx), %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x6a,0x7f,0x7b] + vcmpsd $0x7b, 1016(%rdx), %xmm19, %k5 + +// CHECK: vcmpsd $123, 1024(%rdx), %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xaa,0x00,0x04,0x00,0x00,0x7b] + vcmpsd $0x7b, 1024(%rdx), %xmm19, %k5 + +// CHECK: vcmpsd $123, -1024(%rdx), %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x6a,0x80,0x7b] + vcmpsd $0x7b, -1024(%rdx), %xmm19, %k5 + +// CHECK: vcmpsd $123, -1032(%rdx), %xmm19, %k5 +// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xaa,0xf8,0xfb,0xff,0xff,0x7b] + vcmpsd $0x7b, -1032(%rdx), %xmm19, %k5 + // CHECK: vgetmantss $171, %xmm12, %xmm2, %xmm3 // CHECK: encoding: [0x62,0xd3,0x6d,0x08,0x27,0xdc,0xab] vgetmantss $0xab, %xmm12, %xmm2, %xmm3