Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -16402,6 +16402,23 @@ SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); + if (Subtarget->hasSSE2() && + (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i16) && + (VT != MVT::v2i64 || Op.getOpcode() != ISD::SRA)) { + assert((VT == R.getSimpleValueType() && VT == Amt.getSimpleValueType()) && + "Unexpected operand type"); + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1)); + case ISD::SRL: + return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1)); + case ISD::SRA: + return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1)); + } + } + if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) || VT == MVT::v4i32 || VT == MVT::v8i16 || (Subtarget->hasInt256() && Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -248,19 +248,19 @@ // used for vectorization and we don't want to make vectorized code worse // than scalar code. { ISD::SHL, MVT::v16i8, 30 }, // cmpeqb sequence. - { ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized. - { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. - { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized. + { ISD::SHL, MVT::v8i16, 1 }, + { ISD::SHL, MVT::v4i32, 1 }, + { ISD::SHL, MVT::v2i64, 1 }, { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized. { ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized. - { ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized. - { ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized. - { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized. + { ISD::SRL, MVT::v8i16, 1 }, + { ISD::SRL, MVT::v4i32, 1 }, + { ISD::SRL, MVT::v2i64, 1 }, { ISD::SRA, MVT::v16i8, 16*10 }, // Scalarized. - { ISD::SRA, MVT::v8i16, 8*10 }, // Scalarized. - { ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized. + { ISD::SRA, MVT::v8i16, 1 }, + { ISD::SRA, MVT::v4i32, 1 }, { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized. // It is not a good idea to vectorize division. We have to scalarize it and