Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -442,10 +442,45 @@ bool foldLoadStoreIntoMemOperand(SDNode *Node); bool matchBEXTRFromAnd(SDNode *Node); + + bool isMaskZeroExtended(SDNode *N) const; }; } +// Returns true if this masked compare can be implemented legally with this +// type. +static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { + if (N->getOpcode() == X86ISD::PCMPEQM || + N->getOpcode() == X86ISD::PCMPGTM || + N->getOpcode() == X86ISD::CMPM || + N->getOpcode() == X86ISD::CMPMU) { + // We can get 256-bit 8 element types here without VLX being enabled. When + // this happens we will use 512-bit operations and the mask will not be + // zero extended. + if (N->getOperand(0).getValueType() == MVT::v8i32 || + N->getOperand(0).getValueType() == MVT::v8f32) + return Subtarget->hasVLX(); + + return true; + } + + return false; +} + +// Returns true if we can assume the writer of the mask has zero extended it +// for us. +bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const { + // If this is an AND, check if we have a compare on either side. As long as + // one side guarantees the mask is zero extended, the AND will preserve those + // zeros. + if (N->getOpcode() == ISD::AND) + return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) || + isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget); + + return isLegalMaskCompare(N, Subtarget); +} + bool X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { if (OptLevel == CodeGenOpt::None) return false; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -1866,217 +1866,6 @@ T8PD, VEX_W, EVEX_CD8<64, CD8VF>; -multiclass avx512_icmp_packed_lowering Preds> { -let Predicates = Preds in { - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rr) _.RC:$src1, _.RC:$src2), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT (bitconvert (_.LdFrag addr:$src2))))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rm) _.RC:$src1, addr:$src2), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and _.KRCWM:$mask, - (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rrk) _.KRCWM:$mask, - _.RC:$src1, _.RC:$src2), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and (_.KVT _.KRCWM:$mask), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT (bitconvert - (_.LdFrag addr:$src2))))))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmk) _.KRCWM:$mask, - _.RC:$src1, addr:$src2), - NewInf.KRC)>; -} -} - -multiclass avx512_icmp_packed_rmb_lowering Preds> - : avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { -let Predicates = Preds in { - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), - (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmb) _.RC:$src1, addr:$src2), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and (_.KVT _.KRCWM:$mask), - (_.KVT (OpNode (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)))))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmbk) _.KRCWM:$mask, - _.RC:$src1, addr:$src2), - NewInf.KRC)>; -} -} - -// VPCMPEQB - i8 -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; - -// VPCMPEQW - i16 -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; - -// VPCMPEQD - i32 -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -// VPCMPEQQ - i64 -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -// VPCMPGTB - i8 -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; - -// VPCMPGTW - i16 -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; -defm : avx512_icmp_packed_lowering; - -defm : avx512_icmp_packed_lowering; - -// VPCMPGTD - i32 -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -// VPCMPGTQ - i64 -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; -defm : avx512_icmp_packed_rmb_lowering; - multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, X86VectorVTInfo _> { let isCommutable = 1 in @@ -2238,236 +2027,6 @@ defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; -multiclass avx512_icmp_cc_packed_lowering Preds> { -let Predicates = Preds in { - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT _.RC:$src2), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rri) _.RC:$src1, - _.RC:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT (bitconvert (_.LdFrag addr:$src2))), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmi) _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and _.KRCWM:$mask, - (OpNode (_.VT _.RC:$src1), - (_.VT _.RC:$src2), - imm:$cc))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rrik) _.KRCWM:$mask, - _.RC:$src1, - _.RC:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and (_.KVT _.KRCWM:$mask), - (_.KVT (OpNode (_.VT _.RC:$src1), - (_.VT (bitconvert - (_.LdFrag addr:$src2))), - imm:$cc)))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmik) _.KRCWM:$mask, - _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; -} -} - -multiclass avx512_icmp_cc_packed_rmb_lowering Preds> - : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { -let Predicates = Preds in { - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (OpNode (_.VT _.RC:$src1), - (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmib) _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and (_.KVT _.KRCWM:$mask), - (_.KVT (OpNode (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), - imm:$cc)))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmibk) _.KRCWM:$mask, - _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; -} -} - -// VPCMPB - i8 -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; - -// VPCMPW - i16 -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; - -// VPCMPD - i32 -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -// VPCMPQ - i64 -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -// VPCMPUB - i8 -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; - -// VPCMPUW - i16 -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; -defm : avx512_icmp_cc_packed_lowering; - -defm : avx512_icmp_cc_packed_lowering; - -// VPCMPUD - i32 -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -// VPCMPUQ - i64 -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; - -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; -defm : avx512_icmp_cc_packed_rmb_lowering; multiclass avx512_vcmp_common { @@ -2559,159 +2118,6 @@ defm VCMPPS : avx512_vcmp, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -multiclass avx512_fcmp_cc_packed_lowering Preds> { -let Predicates = Preds in { - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (X86cmpm (_.VT _.RC:$src1), - (_.VT _.RC:$src2), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rri) _.RC:$src1, - _.RC:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and _.KRCWM:$mask, - (X86cmpm (_.VT _.RC:$src1), - (_.VT _.RC:$src2), - imm:$cc))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rrik) _.KRCWM:$mask, - _.RC:$src1, - _.RC:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (X86cmpm (_.VT _.RC:$src1), - (_.VT (bitconvert (_.LdFrag addr:$src2))), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmi) _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and _.KRCWM:$mask, - (X86cmpm (_.VT _.RC:$src1), - (_.VT (bitconvert - (_.LdFrag addr:$src2))), - imm:$cc))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmik) _.KRCWM:$mask, - _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (X86cmpm (_.VT _.RC:$src1), - (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - imm:$cc)), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmbi) _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and _.KRCWM:$mask, - (X86cmpm (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), - imm:$cc))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rmbik) _.KRCWM:$mask, - _.RC:$src1, - addr:$src2, - imm:$cc), - NewInf.KRC)>; -} -} - -multiclass avx512_fcmp_cc_packed_sae_lowering Preds> - : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> { - -let Predicates = Preds in - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (X86cmpmRnd (_.VT _.RC:$src1), - (_.VT _.RC:$src2), - imm:$cc, - (i32 FROUND_NO_EXC))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rrib) _.RC:$src1, - _.RC:$src2, - imm:$cc), - NewInf.KRC)>; - - def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), - (_.KVT (and _.KRCWM:$mask, - (X86cmpmRnd (_.VT _.RC:$src1), - (_.VT _.RC:$src2), - imm:$cc, - (i32 FROUND_NO_EXC)))), - (i64 0)), - (COPY_TO_REGCLASS (!cast(InstrStr##rribk) _.KRCWM:$mask, - _.RC:$src1, - _.RC:$src2, - imm:$cc), - NewInf.KRC)>; -} - - -// VCMPPS - f32 -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; - -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; - -defm : avx512_fcmp_cc_packed_sae_lowering; -defm : avx512_fcmp_cc_packed_sae_lowering; - -// VCMPPD - f64 -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; - -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; -defm : avx512_fcmp_cc_packed_lowering; - -defm : avx512_fcmp_cc_packed_sae_lowering; -defm : avx512_fcmp_cc_packed_sae_lowering; -defm : avx512_fcmp_cc_packed_sae_lowering; // ---------------------------------------------------------------- // FPClass @@ -3211,24 +2617,6 @@ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; - -def : Pat<(insert_subvector (v16i1 immAllZerosV), - (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (i64 0)), - (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrr) - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), - (i8 8)), (i8 8))>; - -def : Pat<(insert_subvector (v16i1 immAllZerosV), - (v8i1 (and VK8:$mask, - (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))), - (i64 0)), - (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrrk) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), - (i8 8)), (i8 8))>; } multiclass axv512_icmp_packed_cc_no_vlx_lowering; - -def : Pat<(insert_subvector (v16i1 immAllZerosV), - (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), - (i64 0)), - (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrri) - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), - (i8 8)), (i8 8))>; - -def : Pat<(insert_subvector (v16i1 immAllZerosV), - (v8i1 (and VK8:$mask, - (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))), - (i64 0)), - (KSHIFTRWri (KSHIFTLWri (!cast(InstStr##Zrrik) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), - (i8 8)), (i8 8))>; } let Predicates = [HasAVX512, NoVLX] in { Index: lib/Target/X86/X86InstrVecCompiler.td =================================================================== --- lib/Target/X86/X86InstrVecCompiler.td +++ lib/Target/X86/X86InstrVecCompiler.td @@ -419,3 +419,84 @@ defm: subvector_zero_ellision; defm: subvector_zero_ellision; defm: subvector_zero_ellision; + + +class maskzeroupper : + PatLeaf<(vt RC:$src), [{ + return isMaskZeroExtended(N); + }]>; + +def maskzeroupperv2i1 : maskzeroupper; +def maskzeroupperv4i1 : maskzeroupper; +def maskzeroupperv8i1 : maskzeroupper; +def maskzeroupperv16i1 : maskzeroupper; +def maskzeroupperv32i1 : maskzeroupper; + +// The patterns determine if we can depend on the upper bits of a mask register +// being zeroed by the previous operation so that we can skip explicit +// zeroing. +let Predicates = [HasBWI] in { + def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), + maskzeroupperv8i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK8:$src, VK32)>; + def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), + maskzeroupperv16i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK16:$src, VK32)>; + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + maskzeroupperv8i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK8:$src, VK64)>; + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + maskzeroupperv16i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK16:$src, VK64)>; + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + maskzeroupperv32i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK32:$src, VK64)>; +} + +let Predicates = [HasAVX512] in { + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + maskzeroupperv8i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK8:$src, VK16)>; +} + +let Predicates = [HasVLX] in { + def : Pat<(v4i1 (insert_subvector (v4i1 immAllZerosV), + maskzeroupperv2i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK2:$src, VK4)>; + def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), + maskzeroupperv2i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK2:$src, VK8)>; + def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), + maskzeroupperv4i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK4:$src, VK8)>; + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + maskzeroupperv2i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK2:$src, VK16)>; + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + maskzeroupperv4i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK4:$src, VK16)>; +} + +let Predicates = [HasBWI, HasVLX] in { + def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), + maskzeroupperv2i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK2:$src, VK32)>; + def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), + maskzeroupperv4i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK4:$src, VK32)>; + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + maskzeroupperv2i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK2:$src, VK64)>; + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + maskzeroupperv4i1:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK4:$src, VK64)>; +} + +// If the bits are not zero we have to fall back to explicitly zeroing by +// using shifts. +let Predicates = [HasAVX512, NoVLX] in { + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + (v8i1 VK8:$mask), (iPTR 0))), + (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16), + (i8 8)), (i8 8))>; +}