Index: X86ISelLowering.cpp =================================================================== --- X86ISelLowering.cpp +++ X86ISelLowering.cpp @@ -28724,12 +28724,21 @@ // partial register update stalls, this should be transformed into a MOVSD // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD. + if (VT == MVT::v2f64) if (auto *Mask = dyn_cast(N->getOperand(2))) + { + // after P6 architecture dose not suffer from partial register stalls. + // remember after P6 architecture supports SSE2. + if (Mask->getZExtValue() == 1 && !isShuffleFoldableLoad(V0) && Subtarget.hasSSE2()) { + return DAG.getNode(X86ISD::MOVSD, DL, VT, V0, V1); + } + if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) { SDValue NewMask = DAG.getConstant(1, DL, MVT::i8); return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask); } + } return SDValue(); }