Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35116,6 +35116,24 @@ if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget)) return V; + // Recognize the IR pattern for the movmsk intrinsic under SSE1 befoer type + // legalization destroys the v4i32 type. + if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && SrcVT == MVT::v4i1 && + VT.isScalarInteger() && N0.getOpcode() == ISD::SETCC && + N0.getOperand(0).getValueType() == MVT::v4i32 && + ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()) && + cast(N0.getOperand(2))->get() == ISD::SETLT) { + SDValue N00 = N0.getOperand(0); + // Only do this if we can avoid scalarizing the input. + if (ISD::isNormalLoad(N00.getNode()) || + (N00.getOpcode() == ISD::BITCAST && + N00.getOperand(0).getValueType() == MVT::v4f32)) { + SDValue V = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32, + DAG.getBitcast(MVT::v4f32, N00)); + return DAG.getZExtOrTrunc(V, dl, VT); + } + } + // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer // type, widen both sides to avoid a trip through memory. if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() && @@ -42517,7 +42535,8 @@ } static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue Src = N->getOperand(0); MVT SrcVT = Src.getSimpleValueType(); MVT VT = N->getSimpleValueType(0); @@ -42538,7 +42557,7 @@ // Look through int->fp bitcasts that don't change the element width. unsigned EltWidth = SrcVT.getScalarSizeInBits(); - if (Src.getOpcode() == ISD::BITCAST && + if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::BITCAST && Src.getOperand(0).getScalarValueSizeInBits() == EltWidth) return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0)); @@ -44519,7 +44538,7 @@ case X86ISD::FMSUBADD_RND: case X86ISD::FMADDSUB: case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, Subtarget); - case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI); + case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget); case X86ISD::MGATHER: case X86ISD::MSCATTER: case ISD::MGATHER: