Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1082,6 +1082,37 @@ return Shuffle; } +namespace { +Value *getSplatValue(Value *V) { + // insertelement undef, X, Y - all elements are undef or X, so we can treat + // this like a splat of X. Note: This is the form simplify demanded elements + // tends to produce. + if (auto *IE = dyn_cast(V)) + if (isa(IE->getOperand(0))) + return IE->getOperand(1); + + // TODO: recognize a shuffle broadcast too + + auto *CV = dyn_cast(V); + if (!CV) + return nullptr; + if (auto *SplatC = CV->getSplatValue()) + return SplatC; + + Constant *Splat = nullptr; + for (unsigned i = 0; i < CV->getNumOperands(); i++) { + Constant *Elt = CV->getOperand(i); + if (isa(Elt)) + continue; + if (!Splat) + Splat = Elt; + if (Elt != Splat) + return nullptr; + } + return Splat; +} +} + /// The specified value produces a vector with any number of elements. /// DemandedElts contains the set of elements that are actually used by the /// caller. This method analyzes which elements of the operand are undef and @@ -1183,6 +1214,50 @@ switch (I->getOpcode()) { default: break; + case Instruction::GetElementPtr: { + // Conservatively track the demanded elements back through any vector + // operands we may have. We know there must be at least one, or we + // wouldn't have a vector result to get here. Note that we intentionally + // merge the undef bits here since gepping with either an undef base or + // index results in undef. + for (unsigned i = 0; i < I->getNumOperands(); i++) + if (I->getOperand(i)->getType()->isVectorTy()) + simplifyAndSetOp(I, i, DemandedElts, UndefElts); + +#if 0 + // If we can do so without changing the result type of the GEP, scalarize + // any operands which are simple vector splats. A couple of notes here: + // 1) If we remove all vector operands, we implicitly change the result + // type of the GEP to a scalar. We can't broadcast the result here, + // and instead leave that portion of the transform to our caller. + // 2) We prefer to leave the last vector operand unchanged if needed to + // ensure at least one argument stays vector typed. This is arbitrary, + // but biases us towards scalarizing pointer operands which seems handy. + // 3) We recognize constant splats w/ undef elements, and + // (insertlement undef, X, Y) since both can be interpreted as + // broadcasting X to all (usable) lanes. Since this is the form we tend + // to produce when simplify operands w/demanded elements, it's nice to + // catch that them ere. + + unsigned LastVectorOperand = I->getNumOperands(); + for (unsigned i = 1; i < I->getNumOperands(); i++) + if (I->getOperand(i)->getType()->isVectorTy()) + LastVectorOperand = i; + assert(LastVectorOperand < I->getNumOperands() && + "must have at least one vector operand"); + + for (unsigned i = 0; i < LastVectorOperand; i++) + if (I->getOperand(i)->getType()->isVectorTy()) + if (auto *SplatC = getSplatValue(I->getOperand(i))) { + I->setOperand(i, SplatC); + MadeChange = true; + } + + // TODO: either move above to caller, or use demanded elements + // (i.e. another use of the vector operand which needs all lanes) +#endif + break; + } case Instruction::InsertElement: { // If this is a variable index, we don't know which element it overwrites. // demand exactly the same input as we produce. Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -292,6 +292,35 @@ if (auto *Phi = dyn_cast(SrcVec)) if (Instruction *ScalarPHI = scalarizePHI(EI, Phi)) return ScalarPHI; + + // If we have an extractelement (gep ...), where we can form a new scalar + // gep without forming extra instructions, prefer the scalar gep in place + // of the extractelement to remove uses from the vector instruction. + if (SrcVec->hasOneUse()) + if (auto *GEP = dyn_cast(SrcVec)) { + unsigned NumVectorOperands = 0; + for (unsigned i = 0; i < GEP->getNumOperands(); i++) + if (GEP->getOperand(i)->getType()->isVectorTy()) + NumVectorOperands++; + + if (NumVectorOperands == 1) { + auto getScalar = [&](Value *V, Value* Index) { + if (!V->getType()->isVectorTy()) + return V; + // Hmm, should we try to match broadcasts or inserts here to avoid + // generating the EE? If so, should we account for this in the + // cost model of the transform? + return Builder.CreateExtractElement(V, Index); + }; + Value *NewPtr = getScalar(GEP->getPointerOperand(), Index); + SmallVector NewIdx; + NewIdx.reserve(GEP->getNumOperands()); + for (unsigned i = 1; i < GEP->getNumOperands(); i++) + NewIdx.push_back(getScalar(GEP->getOperand(i), Index)); + auto *GEP = Builder.CreateGEP(NewPtr, NewIdx, EI.getName()); + return replaceInstUsesWith(EI, GEP); + } + } } BinaryOperator *BO; Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1549,6 +1549,41 @@ return CastInst::Create(CastOpc, NarrowBO, BO.getType()); } +namespace { +Value *getSplatValue(Value *V) { + // TODO: merge both cases below into the existing helper function in + // VectorUtils after careful audit of users to ensure they're compatible. At + // the moment, neither are handled by that routine. + + // insertelement undef, X, Y - all elements are undef or X, so we can treat + // this like a splat of X. Note: This is the form simplify demanded elements + // tends to produce. + if (auto *IE = dyn_cast(V)) + if (isa(IE->getOperand(0))) + return IE->getOperand(1); + + // TODO: recognize a shuffle broadcast too + + auto *CV = dyn_cast(V); + if (!CV) + return nullptr; + if (auto *SplatC = CV->getSplatValue()) + return SplatC; + + Constant *Splat = nullptr; + for (unsigned i = 0; i < CV->getNumOperands(); i++) { + Constant *Elt = CV->getOperand(i); + if (isa(Elt)) + continue; + if (!Splat) + Splat = Elt; + if (Elt != Splat) + return nullptr; + } + return Splat; +} +} + Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); Type *GEPType = GEP.getType(); @@ -1600,6 +1635,39 @@ if (MadeChange) return &GEP; + if (GEPType->isVectorTy()) { + // If we can do so without changing the result type of the GEP, scalarize + // any operands which are simple vector splats. A couple of notes here: + // 1) If we remove all vector operands, we implicitly change the result + // type of the GEP to a scalar. (TODO: Insert the broadcast and do the + // actual conversion to a scalar GEP.) + // 2) We prefer to leave the last vector operand unchanged if needed to + // ensure at least one argument stays vector typed. This is arbitrary, + // but biases us towards scalarizing pointer operands which seems handy. + // 3) We recognize constant splats w/ undef elements, and + // (insertlement undef, X, Y) since both can be interpreted as + // broadcasting X to all (usable) lanes. Since this is the form we tend + // to produce when simplify operands w/demanded elements, it's nice to + // catch that them ere. + + unsigned LastVectorOperand = GEP.getNumOperands(); + for (unsigned i = 0; i < GEP.getNumOperands(); i++) + if (GEP.getOperand(i)->getType()->isVectorTy()) + LastVectorOperand = i; + assert(LastVectorOperand < GEP.getNumOperands() && + "must have at least one vector operand"); + + for (unsigned i = 0; i < LastVectorOperand; i++) + if (GEP.getOperand(i)->getType()->isVectorTy()) + if (auto *SplatC = getSplatValue(GEP.getOperand(i))) { + GEP.setOperand(i, SplatC); + MadeChange = true; + } + + if (MadeChange) + return &GEP; + } + // Check to see if the inputs to the PHI node are getelementptr instructions. if (auto *PN = dyn_cast(PtrOp)) { auto *Op1 = dyn_cast(PN->getOperand(0));