diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -537,15 +537,16 @@ // type. static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { unsigned Opcode = N->getOpcode(); - if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC || - Opcode == X86ISD::CMPM_SAE || Opcode == X86ISD::VFPCLASS) { + if (Opcode == X86ISD::CMPM || Opcode == X86ISD::STRICT_CMPM || + Opcode == ISD::SETCC || Opcode == X86ISD::CMPM_SAE || + Opcode == X86ISD::VFPCLASS) { // We can get 256-bit 8 element types here without VLX being enabled. When // this happens we will use 512-bit operations and the mask will not be // zero extended. EVT OpVT = N->getOperand(0).getValueType(); - // The first operand of X86ISD::CMPM is chain, so we need to get the second - // operand. - if (Opcode == X86ISD::CMPM) + // The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the + // second operand. + if (Opcode == X86ISD::STRICT_CMPM) OpVT = N->getOperand(1).getValueType(); if (OpVT.is256BitVector() || OpVT.is128BitVector()) return Subtarget->hasVLX(); @@ -827,10 +828,10 @@ unsigned NewOpc; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode!"); - case ISD::STRICT_FP_TO_SINT: - case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break; - case ISD::STRICT_FP_TO_UINT: - case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break; + case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break; + case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break; + case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break; + case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break; } SDValue Res; if (N->isStrictFPOpcode()) @@ -839,8 +840,8 @@ {N->getOperand(0), N->getOperand(1)}); else Res = - CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other}, - {CurDAG->getEntryNode(), N->getOperand(0)}); + CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), + N->getOperand(0)); --I; if (N->isStrictFPOpcode()) { SDValue From[] = {SDValue(N, 0), SDValue(N, 1)}; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -325,6 +325,7 @@ // Vector packed double/float comparison. CMPP, + STRICT_CMPP, // Vector integer comparisons. PCMPEQ, PCMPGT, @@ -337,6 +338,7 @@ /// Vector comparison generating mask bits for fp and /// integer signed and unsigned data types. CMPM, + STRICT_CMPM, // Vector comparison with SAE for FP values CMPM_SAE, @@ -504,6 +506,7 @@ // Vector float/double to signed/unsigned integer with truncation. CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE, + STRICT_CVTTP2SI, STRICT_CVTTP2UI, // Scalar float/double to signed/unsigned integer with truncation. CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19782,14 +19782,16 @@ return Res; } SDValue Res, Chain; - unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; if (IsStrict) { + unsigned Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI + : X86ISD::STRICT_CVTTP2UI; Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Op->getOperand(0), Src}); Chain = Res.getValue(1); - } else - Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other}, - {DAG.getEntryNode(), Src}); + } else { + unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; + Res = DAG.getNode(Opc, dl, ResVT, Src); + } Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res, DAG.getIntPtrConstant(0, dl)); @@ -19802,14 +19804,13 @@ if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) { SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, DAG.getUNDEF(MVT::v2f32)); - unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; - SDValue Res, Chain; if (IsStrict) { - Res = DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp}); - Chain = Res.getValue(1); - return DAG.getMergeValues({Res, Chain}, dl); + unsigned Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI + : X86ISD::STRICT_CVTTP2UI; + return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp}); } - return DAG.getNode(Opc, dl, {VT, MVT::Other}, {DAG.getEntryNode(), Tmp}); + unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; + return DAG.getNode(Opc, dl, VT, Tmp); } return SDValue(); @@ -20972,8 +20973,6 @@ SelectionDAG &DAG) { bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC || Op.getOpcode() == ISD::STRICT_FSETCCS; - bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; - SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0); SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1); SDValue CC = Op.getOperand(IsStrict ? 3 : 2); @@ -20988,12 +20987,15 @@ assert(EltVT == MVT::f32 || EltVT == MVT::f64); #endif + bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); + unsigned Opc; if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) { assert(VT.getVectorNumElements() <= 16); - Opc = X86ISD::CMPM; + Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM; } else { - Opc = X86ISD::CMPP; + Opc = IsStrict ? X86ISD::STRICT_CMPP : X86ISD::CMPP; // The SSE/AVX packed FP comparison nodes are defined with a // floating-point vector result that matches the operand type. This allows // them to work with an SSE1 target (integer vector types are not legal). @@ -21044,37 +21046,51 @@ CombineOpc = X86ISD::FAND; } - SDValue Cmp0 = DAG.getNode( - Opc, dl, {VT, MVT::Other}, - {Chain, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)}); - SDValue Cmp1 = DAG.getNode( - Opc, dl, {VT, MVT::Other}, - {Chain, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)}); - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Cmp0.getValue(1), - Cmp1.getValue(1)); + SDValue Cmp0, Cmp1; + if (IsStrict) { + Cmp0 = DAG.getNode( + Opc, dl, {VT, MVT::Other}, + {Chain, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)}); + Cmp1 = DAG.getNode( + Opc, dl, {VT, MVT::Other}, + {Chain, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)}); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Cmp0.getValue(1), + Cmp1.getValue(1)); + } else { + Cmp0 = DAG.getNode( + Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)); + Cmp1 = DAG.getNode( + Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)); + } Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } else { - Cmp = DAG.getNode( - Opc, dl, {VT, MVT::Other}, - {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)}); - Chain = Cmp.getValue(1); + if (IsStrict) { + Cmp = DAG.getNode( + Opc, dl, {VT, MVT::Other}, + {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)}); + Chain = Cmp.getValue(1); + } else + Cmp = DAG.getNode( + Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)); } } else { // Handle all other FP comparisons here. - if (IsStrict) + if (IsStrict) { // Make a flip on already signaling CCs before setting bit 4 of AVX CC. SSECC |= (IsAlwaysSignaling ^ IsSignaling) << 4; - Cmp = DAG.getNode( - Opc, dl, {VT, MVT::Other}, - {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)}); - Chain = Cmp.getValue(1); + Cmp = DAG.getNode( + Opc, dl, {VT, MVT::Other}, + {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)}); + Chain = Cmp.getValue(1); + } else + Cmp = DAG.getNode( + Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)); } // If this is SSE/AVX CMPP, bitcast the result back to integer to match the // result type of SETCC. The bitcast is expected to be optimized away // during combining/isel. - if (Opc == X86ISD::CMPP) - Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp); + Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp); if (IsStrict) return DAG.getMergeValues({Cmp, Chain}, dl); @@ -23151,26 +23167,6 @@ return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset); } -// We share some nodes between STRICT and non STRICT FP intrinsics. -// For these nodes, we need chain them to entry token if they are not called -// by STRICT FP intrinsics. -static SDValue getProperNode(unsigned Opcode, const SDLoc &dl, EVT VT, - ArrayRef Ops, SelectionDAG &DAG) { - switch (Opcode) { - default: - return DAG.getNode(Opcode, dl, VT, Ops); - case X86ISD::CVTTP2SI: - case X86ISD::CVTTP2UI: - case X86ISD::CMPP: - case X86ISD::CMPM: - break; - } - - SmallVector NewOps = {DAG.getEntryNode()}; - NewOps.append(Ops.begin(), Ops.end()); - return DAG.getNode(Opcode, dl, {VT, MVT::Other}, NewOps); -} - SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { // Helper to detect if the operand is CUR_DIRECTION rounding mode. @@ -23232,8 +23228,8 @@ if (!isRoundModeCurDirection(Rnd)) return SDValue(); } - return getProperNode(IntrData->Opc0, dl, Op.getValueType(), - Op.getOperand(1), DAG); + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), + Op.getOperand(1)); } case INTR_TYPE_1OP_SAE: { SDValue Sae = Op.getOperand(2); @@ -23304,8 +23300,8 @@ return SDValue(); } - return getProperNode(IntrData->Opc0, dl, Op.getValueType(), - {Src1, Src2, Src3}, DAG); + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), + {Src1, Src2, Src3}); } case INTR_TYPE_4OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), @@ -23330,7 +23326,7 @@ return SDValue(); } return getVectorMaskingNode( - getProperNode(IntrData->Opc0, dl, VT, Src, DAG), Mask, PassThru, + DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_1OP_MASK_SAE: { @@ -23347,8 +23343,8 @@ else return SDValue(); - return getVectorMaskingNode(getProperNode(Opc, dl, VT, Src, DAG), Mask, - PassThru, Subtarget, DAG); + return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), Mask, PassThru, + Subtarget, DAG); } case INTR_TYPE_SCALAR_MASK: { SDValue Src1 = Op.getOperand(1); @@ -23554,8 +23550,8 @@ return SDValue(); } //default rounding mode - return getProperNode(IntrData->Opc0, dl, MaskVT, - {Op.getOperand(1), Op.getOperand(2), CC}, DAG); + return DAG.getNode(IntrData->Opc0, dl, MaskVT, + {Op.getOperand(1), Op.getOperand(2), CC}); } case CMP_MASK_SCALAR_CC: { SDValue Src1 = Op.getOperand(1); @@ -23750,13 +23746,13 @@ SDValue Mask = Op.getOperand(3); if (isAllOnesConstant(Mask)) - return getProperNode(IntrData->Opc0, dl, Op.getValueType(), Src, DAG); + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src); MVT SrcVT = Src.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); - return getProperNode(IntrData->Opc1, dl, Op.getValueType(), - {Src, PassThru, Mask}, DAG); + return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), + {Src, PassThru, Mask}); } case CVTPS2PH_MASK: { SDValue Src = Op.getOperand(1); @@ -28666,16 +28662,18 @@ // legalization to v8i32<-v8f64. return; } - unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; SDValue Res; SDValue Chain; if (IsStrict) { + unsigned Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI + : X86ISD::STRICT_CVTTP2UI; Res = DAG.getNode(Opc, dl, {MVT::v4i32, MVT::Other}, {N->getOperand(0), Src}); Chain = Res.getValue(1); - } else - Res = DAG.getNode(Opc, dl, {MVT::v4i32, MVT::Other}, - {DAG.getEntryNode(), Src}); + } else { + unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; + Res = DAG.getNode(Opc, dl, MVT::v4i32, Src); + } Results.push_back(Res); if (IsStrict) Results.push_back(Chain); @@ -29114,6 +29112,7 @@ case X86ISD::COMI: return "X86ISD::COMI"; case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::CMPM: return "X86ISD::CMPM"; + case X86ISD::STRICT_CMPM: return "X86ISD::STRICT_CMPM"; case X86ISD::CMPM_SAE: return "X86ISD::CMPM_SAE"; case X86ISD::SETCC: return "X86ISD::SETCC"; case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; @@ -29221,6 +29220,7 @@ case X86ISD::VROTRI: return "X86ISD::VROTRI"; case X86ISD::VPPERM: return "X86ISD::VPPERM"; case X86ISD::CMPP: return "X86ISD::CMPP"; + case X86ISD::STRICT_CMPP: return "X86ISD::STRICT_CMPP"; case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ"; case X86ISD::PCMPGT: return "X86ISD::PCMPGT"; case X86ISD::PHMINPOS: return "X86ISD::PHMINPOS"; @@ -29382,6 +29382,8 @@ case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND"; case X86ISD::CVTTP2SI: return "X86ISD::CVTTP2SI"; case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI"; + case X86ISD::STRICT_CVTTP2SI: return "X86ISD::STRICT_CVTTP2SI"; + case X86ISD::STRICT_CVTTP2UI: return "X86ISD::STRICT_CVTTP2UI"; case X86ISD::MCVTTP2SI: return "X86ISD::MCVTTP2SI"; case X86ISD::MCVTTP2UI: return "X86ISD::MCVTTP2UI"; case X86ISD::CVTTP2SI_SAE: return "X86ISD::CVTTP2SI_SAE"; @@ -34783,6 +34785,7 @@ break; case X86ISD::CVTP2SI: case X86ISD::CVTP2UI: case X86ISD::MCVTP2SI: case X86ISD::MCVTP2UI: + case X86ISD::CVTTP2SI: case X86ISD::CVTTP2UI: case X86ISD::MCVTTP2SI: case X86ISD::MCVTTP2UI: case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: case X86ISD::MCVTSI2P: case X86ISD::MCVTUI2P: @@ -34791,8 +34794,8 @@ In.getOperand(0).getValueType() == MVT::v2i64) return N->getOperand(0); // return the bitcast break; - case X86ISD::CVTTP2SI: - case X86ISD::CVTTP2UI: + case X86ISD::STRICT_CVTTP2SI: + case X86ISD::STRICT_CVTTP2UI: if (In.getOperand(1).getValueType() == MVT::v2f64 || In.getOperand(1).getValueType() == MVT::v2i64) return N->getOperand(0); @@ -42497,14 +42500,11 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { + // FIXME: Handle strict fp nodes. EVT VT = N->getValueType(0); // Convert a full vector load into vzload when not all bits are needed. - SDValue In; - if (N->getOpcode() == X86ISD::CVTTP2SI || N->getOpcode() == X86ISD::CVTTP2UI) - In = N->getOperand(1); - else - In = N->getOperand(0); + SDValue In = N->getOperand(0); MVT InVT = In.getSimpleValueType(); if (VT.getVectorNumElements() < InVT.getVectorNumElements() && ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { @@ -42523,13 +42523,9 @@ LN->getPointerInfo(), LN->getAlignment(), LN->getMemOperand()->getFlags()); - SDValue Convert = getProperNode(N->getOpcode(), dl, VT, - DAG.getBitcast(InVT, VZLoad), DAG); - if (Convert->getOpcode() == X86ISD::CVTTP2SI || - Convert->getOpcode() == X86ISD::CVTTP2UI) - DCI.CombineTo(N, Convert.getValue(0), Convert.getValue(1)); - else - DCI.CombineTo(N, Convert); + SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT, + DAG.getBitcast(InVT, VZLoad)); + DCI.CombineTo(N, Convert); DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); return SDValue(N, 0); } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2528,7 +2528,7 @@ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc", - (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), + (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1>, Sched<[sched]>; @@ -2536,8 +2536,8 @@ (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc", - (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), - timm:$cc), + (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), + timm:$cc), (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc)>, Sched<[sched.Folded, sched.ReadAfterFold]>; @@ -2548,9 +2548,9 @@ "vcmp"#_.Suffix, "$cc, ${src2}"#_.BroadcastStr#", $src1", "$src1, ${src2}"#_.BroadcastStr#", $cc", - (X86cmpm (_.VT _.RC:$src1), - (_.VT (_.BroadcastLdFrag addr:$src2)), - timm:$cc), + (X86any_cmpm (_.VT _.RC:$src1), + (_.VT (_.BroadcastLdFrag addr:$src2)), + timm:$cc), (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc)>, @@ -2558,8 +2558,8 @@ } // Patterns for selecting with loads in other operand. - def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), - timm:$cc), + def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), + timm:$cc), (!cast(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; @@ -2570,8 +2570,8 @@ _.RC:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; - def : Pat<(X86cmpm (_.BroadcastLdFrag addr:$src2), - (_.VT _.RC:$src1), timm:$cc), + def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), + (_.VT _.RC:$src1), timm:$cc), (!cast(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; @@ -3201,8 +3201,8 @@ multiclass axv512_cmp_packed_cc_no_vlx_lowering { -def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), timm:$cc)), +def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc)), (COPY_TO_REGCLASS (!cast(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), @@ -3219,8 +3219,8 @@ timm:$cc), Narrow.KRC)>; // Broadcast load. -def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), - (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), +def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), (COPY_TO_REGCLASS (!cast(InstStr#"Zrmbi") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), @@ -3235,8 +3235,8 @@ addr:$src2, timm:$cc), Narrow.KRC)>; // Commuted with broadcast load. -def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), - (Narrow.VT Narrow.RC:$src1), timm:$cc)), +def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc)), (COPY_TO_REGCLASS (!cast(InstStr#"Zrmbi") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), @@ -8115,19 +8115,19 @@ X86VSintToFpRnd, SchedWriteCvtDQ2PS>, PS, EVEX_CD8<32, CD8VF>; -defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si, +defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, X86cvttp2siSAE, SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si, +defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, X86cvttp2siSAE, SchedWriteCvtPD2DQ>, PD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui, +defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui, +defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, PS, VEX_W, EVEX_CD8<64, CD8VF>; @@ -8171,19 +8171,19 @@ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; -defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si, +defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si, +defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; -defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui, +defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W, PD, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui, +defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD, EVEX_CD8<32, CD8VH>; @@ -8235,7 +8235,7 @@ // Special patterns to allow use of X86mcvttp2si for masking. Instruction // patterns have been disabled with null_frag. - def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))), + def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), (VCVTTPD2DQZ128rr VR128X:$src)>; def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), VK2WM:$mask), @@ -8244,7 +8244,7 @@ VK2WM:$mask), (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; - def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))), + def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), (VCVTTPD2DQZ128rm addr:$src)>; def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), VK2WM:$mask), @@ -8253,7 +8253,7 @@ VK2WM:$mask), (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), + def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2DQZ128rmb addr:$src)>; def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), @@ -8293,7 +8293,7 @@ // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction // patterns have been disabled with null_frag. - def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))), + def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), (VCVTTPD2UDQZ128rr VR128X:$src)>; def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), VK2WM:$mask), @@ -8302,7 +8302,7 @@ VK2WM:$mask), (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; - def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))), + def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), (VCVTTPD2UDQZ128rm addr:$src)>; def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), VK2WM:$mask), @@ -8311,7 +8311,7 @@ VK2WM:$mask), (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), + def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2UDQZ128rmb addr:$src)>; def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), @@ -8344,7 +8344,7 @@ v2i64x_info.ImmAllZerosV)), (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), + def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), (VCVTTPS2QQZ128rm addr:$src)>; def : Pat<(v2i64 (vselect VK2WM:$mask, (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), @@ -8355,7 +8355,7 @@ v2i64x_info.ImmAllZerosV)), (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), + def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), (VCVTTPS2UQQZ128rm addr:$src)>; def : Pat<(v2i64 (vselect VK2WM:$mask, (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), @@ -8368,17 +8368,17 @@ } let Predicates = [HasAVX512, NoVLX] in { -def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))), +def : Pat<(v8i32 (X86any_cvttp2ui (v8f32 VR256X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; -def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))), +def : Pat<(v4i32 (X86any_cvttp2ui (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; -def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))), +def : Pat<(v4i32 (X86any_cvttp2ui (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_xmm)>; @@ -8489,32 +8489,32 @@ } let Predicates = [HasDQI, NoVLX] in { -def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))), +def : Pat<(v2i64 (X86any_cvttp2si (v2f64 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; -def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))), +def : Pat<(v4i64 (X86any_cvttp2si (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_ymm)>; -def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))), +def : Pat<(v4i64 (X86any_cvttp2si (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; -def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))), +def : Pat<(v2i64 (X86any_cvttp2ui (v2f64 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; -def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))), +def : Pat<(v4i64 (X86any_cvttp2ui (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_ymm)>; -def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))), +def : Pat<(v4i64 (X86any_cvttp2ui (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -169,10 +169,15 @@ def X86vshldq : SDNode<"X86ISD::VSHLDQ", X86vshiftimm>; def X86vshrdq : SDNode<"X86ISD::VSRLDQ", X86vshiftimm>; -def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP, [SDNPHasChain]>; def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>; def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>; +def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; +def X86strict_cmpp : SDNode<"X86ISD::STRICT_CMPP", SDTX86VFCMP, [SDNPHasChain]>; +def X86any_cmpp : PatFrags<(ops node:$src1, node:$src2, node:$src3), + [(X86strict_cmpp node:$src1, node:$src2, node:$src3), + (X86cmpp node:$src1, node:$src2, node:$src3)]>; + def X86CmpMaskCC : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisVec<1>, SDTCisSameAs<2, 1>, @@ -181,7 +186,11 @@ SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; -def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC, [SDNPHasChain]>; +def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; +def X86strict_cmpm : SDNode<"X86ISD::STRICT_CMPM", X86CmpMaskCC, [SDNPHasChain]>; +def X86any_cmpm : PatFrags<(ops node:$src1, node:$src2, node:$src3), + [(X86strict_cmpm node:$src1, node:$src2, node:$src3), + (X86cmpm node:$src1, node:$src2, node:$src3)]>; def X86cmpmSAE : SDNode<"X86ISD::CMPM_SAE", X86CmpMaskCC>; def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>; def X86cmpmsSAE : SDNode<"X86ISD::FSETCCM_SAE", X86CmpMaskCCScalar>; @@ -623,8 +632,16 @@ // Vector without rounding mode // cvtt fp-to-int staff -def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>; -def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>; +def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt>; +def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt>; +def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>; +def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>; +def X86any_cvttp2si : PatFrags<(ops node:$src), + [(X86strict_cvttp2si node:$src), + (X86cvttp2si node:$src)]>; +def X86any_cvttp2ui : PatFrags<(ops node:$src), + [(X86strict_cvttp2ui node:$src), + (X86cvttp2ui node:$src)]>; def X86VSintToFP : SDNode<"X86ISD::CVTSI2P", SDTVintToFP>; def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1528,22 +1528,22 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>, + (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (X86cvttp2si (loadv4f32 addr:$src))))]>, + (v4i32 (X86any_cvttp2si (loadv4f32 addr:$src))))]>, VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (v8i32 (X86cvttp2si (v8f32 VR256:$src))))]>, + (v8i32 (X86any_cvttp2si (v8f32 VR256:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (v8i32 (X86cvttp2si (loadv8f32 addr:$src))))]>, + (v8i32 (X86any_cvttp2si (loadv8f32 addr:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG; } @@ -1551,12 +1551,12 @@ def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (X86cvttp2si (v4f32 VR128:$src))))]>, + (v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>, Sched<[WriteCvtPS2I]>; def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>, + (v4i32 (X86any_cvttp2si (memopv4f32 addr:$src))))]>, Sched<[WriteCvtPS2ILd]>; } @@ -1568,24 +1568,24 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>, + (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, VEX, Sched<[WriteCvtPD2I]>, VEX_WIG; def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dq{x}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (X86cvttp2si (loadv2f64 addr:$src))))]>, + (v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))))]>, VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (X86cvttp2si (v4f64 VR256:$src))))]>, + (v4i32 (X86any_cvttp2si (v4f64 VR256:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (X86cvttp2si (loadv4f64 addr:$src))))]>, + (v4i32 (X86any_cvttp2si (loadv4f64 addr:$src))))]>, VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG; } // Predicates = [HasAVX, NoVLX] @@ -1604,12 +1604,12 @@ def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>, + (v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>, Sched<[WriteCvtPD2I]>, SIMD_EXC; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>, + (v4i32 (X86any_cvttp2si (memopv2f64 addr:$src))))]>, Sched<[WriteCvtPD2ILd]>, SIMD_EXC; // Convert packed single to packed double @@ -1925,12 +1925,12 @@ let isCommutable = 1 in def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, - [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, + [(set RC:$dst, (VT (X86any_cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, Sched<[sched]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, [(set RC:$dst, - (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, + (VT (X86any_cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -1963,20 +1963,20 @@ // Patterns to select compares with loads in first operand. let Predicates = [HasAVX] in { - def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1, - CommutableCMPCC:$cc)), + def : Pat<(v4f64 (X86any_cmpp (loadv4f64 addr:$src2), VR256:$src1, + CommutableCMPCC:$cc)), (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>; - def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1, - CommutableCMPCC:$cc)), + def : Pat<(v8f32 (X86any_cmpp (loadv8f32 addr:$src2), VR256:$src1, + CommutableCMPCC:$cc)), (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>; - def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1, - CommutableCMPCC:$cc)), + def : Pat<(v2f64 (X86any_cmpp (loadv2f64 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; - def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1, - CommutableCMPCC:$cc)), + def : Pat<(v4f32 (X86any_cmpp (loadv4f32 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, @@ -1989,8 +1989,8 @@ } let Predicates = [UseSSE2] in { - def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1, - CommutableCMPCC:$cc)), + def : Pat<(v2f64 (X86any_cmpp (memopv2f64 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, @@ -1999,8 +1999,8 @@ } let Predicates = [UseSSE1] in { - def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1, - CommutableCMPCC:$cc)), + def : Pat<(v4f32 (X86any_cmpp (memopv4f32 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -4322,21 +4322,17 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512: ; X64: # %bb.0: -; X64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2dq %zmm0, %ymm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1} ; X64-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512: ; X86: # %bb.0: -; X86-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2dq %zmm0, %ymm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1} ; X86-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl @@ -4377,21 +4373,17 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512: ; X64: # %bb.0: -; X64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2udq %zmm0, %ymm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1} ; X64-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512: ; X86: # %bb.0: -; X86-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2udq %zmm0, %ymm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1} ; X86-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl @@ -4407,8 +4399,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2dq %zmm0, %zmm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1} ; X64-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X64-NEXT: retq @@ -4416,8 +4407,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; X86-NEXT: vcvttps2dq %zmm0, %zmm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1} ; X86-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X86-NEXT: retl @@ -4433,8 +4423,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %zmm0, %zmm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1} ; X64-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X64-NEXT: retq @@ -4442,8 +4431,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; X86-NEXT: vcvttps2udq %zmm0, %zmm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1} ; X86-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -286,8 +286,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttpd2qq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x7a,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8] ; X86-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -295,8 +294,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttpd2qq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x7a,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8] ; X64-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -312,8 +310,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttpd2uqq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x78,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8] ; X86-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -321,8 +318,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttpd2uqq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x78,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8] ; X64-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -338,8 +334,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttps2qq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x7a,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8] ; X86-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -347,8 +342,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttps2qq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x7a,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8] ; X64-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -364,8 +358,7 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttps2uqq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x78,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8] ; X86-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -373,8 +366,7 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttps2uqq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x78,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8] ; X64-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -599,17 +599,17 @@ define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128: ; X86: # %bb.0: -; X86-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] +; X86-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8] +; X86-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8] ; X64-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0] -; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) @@ -623,17 +623,17 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] +; X86-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8] +; X86-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8] ; X64-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0] -; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) @@ -647,17 +647,17 @@ define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128: ; X86: # %bb.0: -; X86-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] +; X86-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8] +; X86-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8] ; X64-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0] -; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) @@ -671,17 +671,17 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] +; X86-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8] +; X86-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8] ; X64-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0] -; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) @@ -695,17 +695,17 @@ define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128: ; X86: # %bb.0: -; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] +; X86-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8] +; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8] ; X64-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] -; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) @@ -735,16 +735,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -756,16 +754,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -795,16 +791,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -816,16 +810,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -853,16 +845,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask) @@ -873,16 +863,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask) @@ -894,17 +882,17 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] +; X86-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8] +; X86-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8] ; X64-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0] -; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) @@ -995,17 +983,17 @@ define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128: ; X86: # %bb.0: -; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] +; X86-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8] +; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8] ; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] -; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) @@ -1035,16 +1023,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -1056,16 +1042,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> @@ -1095,16 +1079,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -1116,16 +1098,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> @@ -1153,16 +1133,14 @@ ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask) @@ -1173,16 +1151,14 @@ ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask) @@ -1194,17 +1170,17 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] +; X86-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8] +; X86-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8] ; X64-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0] -; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll @@ -682,17 +682,15 @@ ; X86-LABEL: test_mm256_mask_cvttpd_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttpd2dq %ymm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttpd2dq %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttpd_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttpd2dq %ymm1, %xmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttpd2dq %ymm1, %xmm0 {%k1} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -709,17 +707,15 @@ ; X86-LABEL: test_mm256_maskz_cvttpd_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttpd_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttpd2dq %ymm0, %xmm0 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttpd2dq %ymm0, %xmm0 {%k1} {z} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -798,16 +794,14 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2udq %ymm1, %xmm1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttpd2udq %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttpd_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2udq %ymm1, %xmm1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttpd2udq %ymm1, %xmm0 {%k1} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -822,16 +816,14 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttpd_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2udq %ymm0, %xmm0 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttpd2udq %ymm0, %xmm0 {%k1} {z} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -844,16 +836,14 @@ ; X86-LABEL: test_mm_mask_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %xmm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttps2dq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %xmm1, %xmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttps2dq %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: %0 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %__A) #8 @@ -869,16 +859,14 @@ ; X86-LABEL: test_mm_maskz_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %xmm0, %xmm0 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttps2dq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskz_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %xmm0, %xmm0 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttps2dq %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %__A) #8 @@ -893,16 +881,14 @@ ; X86-LABEL: test_mm256_mask_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %ymm1, %ymm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X86-NEXT: vcvttps2dq %ymm1, %ymm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %ymm1, %ymm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X64-NEXT: vcvttps2dq %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: %0 = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %__A) #8 @@ -917,16 +903,14 @@ ; X86-LABEL: test_mm256_maskz_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %ymm0, %ymm0 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-NEXT: vcvttps2dq %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %ymm0, %ymm0 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: vcvttps2dq %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %__A) #8 @@ -952,15 +936,13 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %xmm1, %xmm1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttps2udq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %xmm1, %xmm1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttps2udq %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__W to <4 x i32> @@ -974,15 +956,13 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %xmm0, %xmm0 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttps2udq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskz_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %xmm0, %xmm0 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttps2udq %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 %__U) #8 @@ -1006,15 +986,13 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %ymm1, %ymm1 -; X86-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X86-NEXT: vcvttps2udq %ymm1, %ymm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %ymm1, %ymm1 -; X64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X64-NEXT: vcvttps2udq %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__W to <8 x i32> @@ -1028,15 +1006,13 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %ymm0, %ymm0 -; X86-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-NEXT: vcvttps2udq %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %ymm0, %ymm0 -; X64-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: vcvttps2udq %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 %__U) #8 diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -10374,20 +10374,20 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0] +; X86-NEXT: vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] -; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X86-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8] +; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256: ; X64: # %bb.0: -; X64-NEXT: vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0] +; X64-NEXT: vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0] ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] -; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X64-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8] +; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) @@ -10401,19 +10401,19 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128: ; X86: # %bb.0: -; X86-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] +; X86-NEXT: vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] -; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X86-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8] +; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128: ; X64: # %bb.0: -; X64-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] +; X64-NEXT: vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0] ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] -; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X64-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8] +; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) @@ -10426,19 +10426,19 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0] +; X86-NEXT: vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8] -; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X86-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8] +; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256: ; X64: # %bb.0: -; X64-NEXT: vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0] +; X64-NEXT: vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0] ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8] -; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X64-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8] +; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3704,8 +3704,8 @@ ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8] ; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0] -; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] @@ -3713,8 +3713,8 @@ ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8] ; X64-NEXT: vcvttpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0] -; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] @@ -3731,16 +3731,16 @@ ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8] ; X86-NEXT: vcvttps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0] -; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8] ; X64-NEXT: vcvttps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0] -; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) @@ -3756,16 +3756,16 @@ ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8] ; X86-NEXT: vcvttps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0] -; X86-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8] ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8] ; X64-NEXT: vcvttps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0] -; X64-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8] ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)