Skip to content

Commit eaf2da1

Browse files
author
Asaf Badouh
committedSep 21, 2015
[X86][AVX512] add masked version for RSQRT14 & RCP14 Scalar FP
Differential Revision: http://reviews.llvm.org/D12524 llvm-svn: 248147
1 parent 6b14435 commit eaf2da1

File tree

5 files changed

+182
-46
lines changed

5 files changed

+182
-46
lines changed
 

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -15890,6 +15890,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
1589015890
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
1589115891
Mask, PassThru, Subtarget, DAG);
1589215892
}
15893+
case INTR_TYPE_SCALAR_MASK: {
15894+
SDValue Src1 = Op.getOperand(1);
15895+
SDValue Src2 = Op.getOperand(2);
15896+
SDValue passThru = Op.getOperand(3);
15897+
SDValue Mask = Op.getOperand(4);
15898+
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2),
15899+
Mask, passThru, Subtarget, DAG);
15900+
}
1589315901
case INTR_TYPE_SCALAR_MASK_RM: {
1589415902
SDValue Src1 = Op.getOperand(1);
1589515903
SDValue Src2 = Op.getOperand(2);
@@ -16059,7 +16067,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
1605916067
}
1606016068
case FPCLASS: {
1606116069
// FPclass intrinsics with mask
16062-
//
1606316070
SDValue Src1 = Op.getOperand(1);
1606416071
EVT VT = Src1.getValueType();
1606516072
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,

‎llvm/lib/Target/X86/X86InstrAVX512.td

+24-44
Original file line numberDiff line numberDiff line change
@@ -5360,50 +5360,31 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
53605360
}
53615361

53625362
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
5363-
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
5364-
X86MemOperand x86memop> {
5365-
let hasSideEffects = 0 in {
5366-
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
5367-
(ins RC:$src1, RC:$src2),
5368-
!strconcat(OpcodeStr,
5369-
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
5363+
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
5364+
X86VectorVTInfo _> {
5365+
let hasSideEffects = 0, AddedComplexity = 20 , Predicates = [HasAVX512] in {
5366+
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5367+
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5368+
"$src2, $src1", "$src1, $src2",
5369+
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
53705370
let mayLoad = 1 in {
5371-
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5372-
(ins RC:$src1, x86memop:$src2),
5373-
!strconcat(OpcodeStr,
5374-
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
5371+
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5372+
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5373+
"$src2, $src1", "$src1, $src2",
5374+
(OpNode (_.VT _.RC:$src1),
5375+
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))))>, EVEX_4V;
53755376
}
53765377
}
53775378
}
53785379

5379-
defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
5380-
EVEX_CD8<32, CD8VT1>;
5381-
defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
5382-
VEX_W, EVEX_CD8<64, CD8VT1>;
5383-
defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
5384-
EVEX_CD8<32, CD8VT1>;
5385-
defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
5386-
VEX_W, EVEX_CD8<64, CD8VT1>;
5387-
5388-
def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
5389-
(v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
5390-
(COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
5391-
(COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
5392-
5393-
def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
5394-
(v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
5395-
(COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
5396-
(COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
5397-
5398-
def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
5399-
(v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
5400-
(COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
5401-
(COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
5402-
5403-
def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
5404-
(v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
5405-
(COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
5406-
(COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
5380+
defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>,
5381+
EVEX_CD8<32, CD8VT1>, T8PD;
5382+
defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>,
5383+
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
5384+
defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>,
5385+
EVEX_CD8<32, CD8VT1>, T8PD;
5386+
defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
5387+
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
54075388

54085389
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
54095390
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -5685,15 +5666,14 @@ defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
56855666

56865667
let Predicates = [HasAVX512] in {
56875668
def : Pat<(f32 (X86frsqrt FR32X:$src)),
5688-
(VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
5669+
(COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
56895670
def : Pat<(f32 (X86frsqrt (load addr:$src))),
5690-
(VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
5671+
(COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
56915672
Requires<[OptForSize]>;
5692-
56935673
def : Pat<(f32 (X86frcp FR32X:$src)),
5694-
(VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
5674+
(COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
56955675
def : Pat<(f32 (X86frcp (load addr:$src))),
5696-
(VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
5676+
(COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
56975677
Requires<[OptForSize]>;
56985678
}
56995679

‎llvm/lib/Target/X86/X86InstrFragmentsSIMD.td

+2
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp,
5858
[SDNPCommutative, SDNPAssociative]>;
5959
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
6060
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
61+
def X86frsqrt14s: SDNode<"X86ISD::FRSQRT", SDTFPBinOp>;
62+
def X86frcp14s : SDNode<"X86ISD::FRCP", SDTFPBinOp>;
6163
def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
6264
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
6365
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;

‎llvm/lib/Target/X86/X86IntrinsicsInfo.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ enum IntrinsicType {
2525
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
2626
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
2727
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
28-
VPERM_3OP_MASKZ,
28+
VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
2929
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
3030
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
3131
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
@@ -1517,10 +1517,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
15171517
X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
15181518
X86_INTRINSIC_DATA(avx512_psll_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSHLDQ, 0),
15191519
X86_INTRINSIC_DATA(avx512_psrl_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSRLDQ, 0),
1520+
X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
1521+
X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
15201522
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
15211523
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
15221524
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
15231525
X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
1526+
X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
1527+
X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
15241528
X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
15251529
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
15261530
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),

‎llvm/test/MC/X86/avx512-encodings.s

+143
Original file line numberDiff line numberDiff line change
@@ -17237,3 +17237,146 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
1723717237
// CHECK: vcvttss2usi -516(%rdx), %r8
1723817238
// CHECK: encoding: [0x62,0x71,0xfe,0x08,0x78,0x82,0xfc,0xfd,0xff,0xff]
1723917239
vcvttss2usi -516(%rdx), %r8
17240+
// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26
17241+
// CHECK: encoding: [0x62,0x42,0xcd,0x08,0x4f,0xd2]
17242+
vrsqrt14sd %xmm10, %xmm6, %xmm26
17243+
17244+
// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5}
17245+
// CHECK: encoding: [0x62,0x42,0xcd,0x0d,0x4f,0xd2]
17246+
vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5}
17247+
17248+
// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5} {z}
17249+
// CHECK: encoding: [0x62,0x42,0xcd,0x8d,0x4f,0xd2]
17250+
vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5} {z}
17251+
17252+
// CHECK: vrsqrt14sd (%rcx), %xmm6, %xmm26
17253+
// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x11]
17254+
vrsqrt14sd (%rcx), %xmm6, %xmm26
17255+
17256+
// CHECK: vrsqrt14sd 291(%rax,%r14,8), %xmm6, %xmm26
17257+
// CHECK: encoding: [0x62,0x22,0xcd,0x08,0x4f,0x94,0xf0,0x23,0x01,0x00,0x00]
17258+
vrsqrt14sd 291(%rax,%r14,8), %xmm6, %xmm26
17259+
17260+
// CHECK: vrsqrt14sd 1016(%rdx), %xmm6, %xmm26
17261+
// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x52,0x7f]
17262+
vrsqrt14sd 1016(%rdx), %xmm6, %xmm26
17263+
17264+
// CHECK: vrsqrt14sd 1024(%rdx), %xmm6, %xmm26
17265+
// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x92,0x00,0x04,0x00,0x00]
17266+
vrsqrt14sd 1024(%rdx), %xmm6, %xmm26
17267+
17268+
// CHECK: vrsqrt14sd -1024(%rdx), %xmm6, %xmm26
17269+
// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x52,0x80]
17270+
vrsqrt14sd -1024(%rdx), %xmm6, %xmm26
17271+
17272+
// CHECK: vrsqrt14sd -1032(%rdx), %xmm6, %xmm26
17273+
// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x92,0xf8,0xfb,0xff,0xff]
17274+
vrsqrt14sd -1032(%rdx), %xmm6, %xmm26
17275+
17276+
// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14
17277+
// CHECK: encoding: [0x62,0x52,0x0d,0x08,0x4f,0xf1]
17278+
vrsqrt14ss %xmm9, %xmm14, %xmm14
17279+
17280+
// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1}
17281+
// CHECK: encoding: [0x62,0x52,0x0d,0x09,0x4f,0xf1]
17282+
vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1}
17283+
17284+
// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1} {z}
17285+
// CHECK: encoding: [0x62,0x52,0x0d,0x89,0x4f,0xf1]
17286+
vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1} {z}
17287+
17288+
// CHECK: vrsqrt14ss (%rcx), %xmm14, %xmm14
17289+
// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0x31]
17290+
vrsqrt14ss (%rcx), %xmm14, %xmm14
17291+
17292+
// CHECK: vrsqrt14ss 291(%rax,%r14,8), %xmm14, %xmm14
17293+
// CHECK: encoding: [0x62,0x32,0x0d,0x08,0x4f,0xb4,0xf0,0x23,0x01,0x00,0x00]
17294+
vrsqrt14ss 291(%rax,%r14,8), %xmm14, %xmm14
17295+
17296+
// CHECK: vrsqrt14ss 508(%rdx), %xmm14, %xmm14
17297+
// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0x72,0x7f]
17298+
vrsqrt14ss 508(%rdx), %xmm14, %xmm14
17299+
17300+
// CHECK: vrsqrt14ss 512(%rdx), %xmm14, %xmm14
17301+
// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0xb2,0x00,0x02,0x00,0x00]
17302+
vrsqrt14ss 512(%rdx), %xmm14, %xmm14
17303+
17304+
// CHECK: vrsqrt14ss -512(%rdx), %xmm14, %xmm14
17305+
// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0x72,0x80]
17306+
vrsqrt14ss -512(%rdx), %xmm14, %xmm14
17307+
17308+
// CHECK: vrsqrt14ss -516(%rdx), %xmm14, %xmm14
17309+
// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0xb2,0xfc,0xfd,0xff,0xff]
17310+
vrsqrt14ss -516(%rdx), %xmm14, %xmm14
17311+
17312+
// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12
17313+
// CHECK: encoding: [0x62,0x52,0xcd,0x00,0x4d,0xe6]
17314+
vrcp14sd %xmm14, %xmm22, %xmm12
17315+
17316+
// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12 {%k2}
17317+
// CHECK: encoding: [0x62,0x52,0xcd,0x02,0x4d,0xe6]
17318+
vrcp14sd %xmm14, %xmm22, %xmm12 {%k2}
17319+
17320+
// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12 {%k2} {z}
17321+
// CHECK: encoding: [0x62,0x52,0xcd,0x82,0x4d,0xe6]
17322+
vrcp14sd %xmm14, %xmm22, %xmm12 {%k2} {z}
17323+
17324+
// CHECK: vrcp14sd (%rcx), %xmm22, %xmm12
17325+
// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0x21]
17326+
vrcp14sd (%rcx), %xmm22, %xmm12
17327+
17328+
// CHECK: vrcp14sd 291(%rax,%r14,8), %xmm22, %xmm12
17329+
// CHECK: encoding: [0x62,0x32,0xcd,0x00,0x4d,0xa4,0xf0,0x23,0x01,0x00,0x00]
17330+
vrcp14sd 291(%rax,%r14,8), %xmm22, %xmm12
17331+
17332+
// CHECK: vrcp14sd 1016(%rdx), %xmm22, %xmm12
17333+
// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0x62,0x7f]
17334+
vrcp14sd 1016(%rdx), %xmm22, %xmm12
17335+
17336+
// CHECK: vrcp14sd 1024(%rdx), %xmm22, %xmm12
17337+
// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0xa2,0x00,0x04,0x00,0x00]
17338+
vrcp14sd 1024(%rdx), %xmm22, %xmm12
17339+
17340+
// CHECK: vrcp14sd -1024(%rdx), %xmm22, %xmm12
17341+
// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0x62,0x80]
17342+
vrcp14sd -1024(%rdx), %xmm22, %xmm12
17343+
17344+
// CHECK: vrcp14sd -1032(%rdx), %xmm22, %xmm12
17345+
// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0xa2,0xf8,0xfb,0xff,0xff]
17346+
vrcp14sd -1032(%rdx), %xmm22, %xmm12
17347+
17348+
// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8
17349+
// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0xc3]
17350+
vrcp14ss %xmm3, %xmm8, %xmm8
17351+
17352+
// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8 {%k7}
17353+
// CHECK: encoding: [0x62,0x72,0x3d,0x0f,0x4d,0xc3]
17354+
vrcp14ss %xmm3, %xmm8, %xmm8 {%k7}
17355+
17356+
// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8 {%k7} {z}
17357+
// CHECK: encoding: [0x62,0x72,0x3d,0x8f,0x4d,0xc3]
17358+
vrcp14ss %xmm3, %xmm8, %xmm8 {%k7} {z}
17359+
17360+
// CHECK: vrcp14ss (%rcx), %xmm8, %xmm8
17361+
// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x01]
17362+
vrcp14ss (%rcx), %xmm8, %xmm8
17363+
17364+
// CHECK: vrcp14ss 291(%rax,%r14,8), %xmm8, %xmm8
17365+
// CHECK: encoding: [0x62,0x32,0x3d,0x08,0x4d,0x84,0xf0,0x23,0x01,0x00,0x00]
17366+
vrcp14ss 291(%rax,%r14,8), %xmm8, %xmm8
17367+
17368+
// CHECK: vrcp14ss 508(%rdx), %xmm8, %xmm8
17369+
// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x42,0x7f]
17370+
vrcp14ss 508(%rdx), %xmm8, %xmm8
17371+
17372+
// CHECK: vrcp14ss 512(%rdx), %xmm8, %xmm8
17373+
// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x82,0x00,0x02,0x00,0x00]
17374+
vrcp14ss 512(%rdx), %xmm8, %xmm8
17375+
17376+
// CHECK: vrcp14ss -512(%rdx), %xmm8, %xmm8
17377+
// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x42,0x80]
17378+
vrcp14ss -512(%rdx), %xmm8, %xmm8
17379+
17380+
// CHECK: vrcp14ss -516(%rdx), %xmm8, %xmm8
17381+
// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x82,0xfc,0xfd,0xff,0xff]
17382+
vrcp14ss -516(%rdx), %xmm8, %xmm8

0 commit comments

Comments
 (0)