Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -9925,9 +9925,7 @@ Subtarget, DAG, DL); SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL); SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1; - return DAG.getSelect(DL, VT, VMask, - DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector), - ZeroVector); + return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask); } static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, @@ -22043,9 +22041,15 @@ if (isAllOnesConstant(Mask)) // return data as is return Op.getOperand(1); - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, - DataToCompress), - Mask, PassThru, Subtarget, DAG); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + + // Avoid false dependency. + if (PassThru.isUndef()) + PassThru = DAG.getConstant(0, dl, VT); + + return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru, + Mask); } case FIXUPIMMS: case FIXUPIMMS_MASKZ: Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -10546,7 +10546,7 @@ string OpcodeStr, X86FoldableSchedWrite sched> { defm rr : AVX512_maskable, AVX5128IBase, + (null_frag)>, AVX5128IBase, Sched<[sched]>; let mayStore = 1, hasSideEffects = 0 in @@ -10568,6 +10568,13 @@ def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), (!cast(Name#_.ZSuffix##mrk) addr:$dst, _.KRCWM:$mask, _.RC:$src)>; + + def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), + (!cast(Name#_.ZSuffix##rrk) + _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; + def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), + (!cast(Name#_.ZSuffix##rrkz) + _.KRCWM:$mask, _.RC:$src)>; } multiclass compress_by_elt_width opc, string OpcodeStr, @@ -10601,13 +10608,12 @@ string OpcodeStr, X86FoldableSchedWrite sched> { defm rr : AVX512_maskable, AVX5128IBase, + (null_frag)>, AVX5128IBase, Sched<[sched]>; defm rm : AVX512_maskable, + (null_frag)>, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -10626,6 +10632,13 @@ (_.VT _.RC:$src0))), (!cast(Name#_.ZSuffix##rmk) _.RC:$src0, _.KRCWM:$mask, addr:$src)>; + + def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), + (!cast(Name#_.ZSuffix##rrk) + _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; + def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), + (!cast(Name#_.ZSuffix##rrkz) + _.KRCWM:$mask, _.RC:$src)>; } multiclass expand_by_elt_width opc, string OpcodeStr, Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -524,10 +524,14 @@ def X86ReducesRnd : SDNode<"X86ISD::VREDUCES_RND", SDTFPBinOpImmRound>; def X86GetMantsRnd : SDNode<"X86ISD::VGETMANTS_RND", SDTFPBinOpImmRound>; -def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1, - [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>; -def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1, - [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>; +def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisVec<1>, + SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<0, 3>]>, []>; +def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisVec<1>, + SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<0, 3>]>, []>; // vpshufbitqmb def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB",