Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18780,6 +18780,43 @@ unsigned InsIdx = cast(N2)->getZExtValue(); + // Push subvector bitcasts to the output, adjusting the index as we go. + // insert_subvector(bitcast(v), bitcast(s), c1) + // -> bitcast(insert_subvector(v, s, c2)) + if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) && + N1.getOpcode() == ISD::BITCAST) { + SDValue N0Src = peekThroughBitcasts(N0); + SDValue N1Src = peekThroughBitcasts(N1); + EVT N0SrcSVT = N0Src.getValueType().getScalarType(); + EVT N1SrcSVT = N1Src.getValueType().getScalarType(); + if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) && + N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) { + EVT NewVT; + SDLoc DL(N); + SDValue NewIdx; + MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + LLVMContext &Ctx = *DAG.getContext(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) { + unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits(); + NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale); + NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT); + } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) { + unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits; + if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) { + NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale); + NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT); + } + } + if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) { + SDValue Res = DAG.getBitcast(NewVT, N0Src); + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx); + return DAG.getBitcast(VT, Res); + } + } + } + // Canonicalize insert_subvector dag nodes. // Example: // (insert_subvector (insert_subvector A, Idx0), Idx1) Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -43066,42 +43066,6 @@ } } - // Push subvector bitcasts to the output, adjusting the index as we go. - // insert_subvector(bitcast(v), bitcast(s), c1) -> - // bitcast(insert_subvector(v,s,c2)) - // TODO: Move this to generic - which only supports same scalar sizes. - if ((Vec.isUndef() || Vec.getOpcode() == ISD::BITCAST) && - SubVec.getOpcode() == ISD::BITCAST) { - SDValue VecSrc = peekThroughBitcasts(Vec); - SDValue SubVecSrc = peekThroughBitcasts(SubVec); - MVT VecSrcSVT = VecSrc.getSimpleValueType().getScalarType(); - MVT SubVecSrcSVT = SubVecSrc.getSimpleValueType().getScalarType(); - if (Vec.isUndef() || VecSrcSVT == SubVecSrcSVT) { - MVT NewOpVT; - SDValue NewIdx; - unsigned NumElts = OpVT.getVectorNumElements(); - unsigned EltSizeInBits = OpVT.getScalarSizeInBits(); - if ((EltSizeInBits % SubVecSrcSVT.getSizeInBits()) == 0) { - unsigned Scale = EltSizeInBits / SubVecSrcSVT.getSizeInBits(); - NewOpVT = MVT::getVectorVT(SubVecSrcSVT, NumElts * Scale); - NewIdx = DAG.getIntPtrConstant(IdxVal * Scale, dl); - } else if ((SubVecSrcSVT.getSizeInBits() % EltSizeInBits) == 0) { - unsigned Scale = SubVecSrcSVT.getSizeInBits() / EltSizeInBits; - if ((IdxVal % Scale) == 0) { - NewOpVT = MVT::getVectorVT(SubVecSrcSVT, NumElts / Scale); - NewIdx = DAG.getIntPtrConstant(IdxVal / Scale, dl); - } - } - if (NewIdx && DAG.getTargetLoweringInfo().isOperationLegal( - ISD::INSERT_SUBVECTOR, NewOpVT)) { - SDValue Res = DAG.getBitcast(NewOpVT, VecSrc); - Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewOpVT, Res, SubVecSrc, - NewIdx); - return DAG.getBitcast(OpVT, Res); - } - } - } - // Match concat_vector style patterns. SmallVector SubVectorOps; if (collectConcatOps(N, SubVectorOps))