Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1082,6 +1082,37 @@
   return Shuffle;
 }
 
+namespace {
+Value *getSplatValue(Value *V) {
+  // insertelement undef, X, Y - all elements are undef or X, so we can treat
+  // this like a splat of X.  Note: This is the form simplify demanded elements
+  // tends to produce.
+  if (auto *IE = dyn_cast<InsertElementInst>(V))
+    if (isa<UndefValue>(IE->getOperand(0)))
+      return IE->getOperand(1);
+
+  // TODO: recognize a shuffle broadcast too
+
+  auto *CV = dyn_cast<ConstantVector>(V);
+  if (!CV)
+    return nullptr;
+  if (auto *SplatC = CV->getSplatValue())
+    return SplatC;
+  
+  Constant *Splat = nullptr;
+  for (unsigned i = 0; i < CV->getNumOperands(); i++) {
+    Constant *Elt = CV->getOperand(i);
+    if (isa<UndefValue>(Elt))
+      continue;
+    if (!Splat)
+      Splat = Elt;
+    if (Elt != Splat)
+      return nullptr;
+  }
+  return Splat;
+}
+}
+
 /// The specified value produces a vector with any number of elements.
 /// DemandedElts contains the set of elements that are actually used by the
 /// caller. This method analyzes which elements of the operand are undef and
@@ -1183,6 +1214,50 @@
   switch (I->getOpcode()) {
   default: break;
 
+  case Instruction::GetElementPtr: {
+    // Conservatively track the demanded elements back through any vector
+    // operands we may have.  We know there must be at least one, or we
+    // wouldn't have a vector result to get here. Note that we intentionally
+    // merge the undef bits here since gepping with either an undef base or
+    // index results in undef. 
+    for (unsigned i = 0; i < I->getNumOperands(); i++)
+      if (I->getOperand(i)->getType()->isVectorTy())
+        simplifyAndSetOp(I, i, DemandedElts, UndefElts);
+
+#if 0
+    // If we can do so without changing the result type of the GEP, scalarize
+    // any operands which are simple vector splats.  A couple of notes here:
+    // 1) If we remove all vector operands, we implicitly change the result
+    //    type of the GEP to a scalar.  We can't broadcast the result here,
+    //    and instead leave that portion of the transform to our caller. 
+    // 2) We prefer to leave the last vector operand unchanged if needed to
+    //    ensure at least one argument stays vector typed.  This is arbitrary,
+    //    but biases us towards scalarizing pointer operands which seems handy.
+    // 3) We recognize constant splats w/ undef elements, and
+    //    (insertlement undef, X, Y) since both can be interpreted as
+    //    broadcasting X to all (usable) lanes. Since this is the form we tend
+    //    to produce when simplify operands w/demanded elements, it's nice to
+    //    catch that them ere.
+    
+    unsigned LastVectorOperand = I->getNumOperands();
+    for (unsigned i = 1; i < I->getNumOperands(); i++)
+      if (I->getOperand(i)->getType()->isVectorTy())
+        LastVectorOperand = i;
+    assert(LastVectorOperand < I->getNumOperands() &&
+           "must have at least one vector operand");
+    
+    for (unsigned i = 0; i < LastVectorOperand; i++)
+      if (I->getOperand(i)->getType()->isVectorTy())
+        if (auto *SplatC = getSplatValue(I->getOperand(i))) {
+          I->setOperand(i, SplatC);
+          MadeChange = true;
+        }
+
+    // TODO: either move above to caller, or use demanded elements
+    // (i.e. another use of the vector operand which needs all lanes)
+#endif    
+    break;
+  }
   case Instruction::InsertElement: {
     // If this is a variable index, we don't know which element it overwrites.
     // demand exactly the same input as we produce.
Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -292,6 +292,35 @@
     if (auto *Phi = dyn_cast<PHINode>(SrcVec))
       if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
         return ScalarPHI;
+
+    // If we have an extractelement (gep ...), where we can form a new scalar
+    // gep without forming extra instructions, prefer the scalar gep in place
+    // of the extractelement to remove uses from the vector instruction.
+    if (SrcVec->hasOneUse())
+      if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcVec)) {
+        unsigned NumVectorOperands = 0;
+        for (unsigned i = 0; i < GEP->getNumOperands(); i++)
+          if (GEP->getOperand(i)->getType()->isVectorTy())
+            NumVectorOperands++;
+
+        if (NumVectorOperands == 1) {
+          auto getScalar = [&](Value *V, Value* Index) {
+            if (!V->getType()->isVectorTy())
+              return V;
+            // Hmm, should we try to match broadcasts or inserts here to avoid
+            // generating the EE?  If so, should we account for this in the
+            // cost model of the transform?
+            return Builder.CreateExtractElement(V, Index);
+          };
+          Value *NewPtr = getScalar(GEP->getPointerOperand(), Index);
+          SmallVector<Value*, 4> NewIdx;
+          NewIdx.reserve(GEP->getNumOperands());
+          for (unsigned i = 1; i < GEP->getNumOperands(); i++)
+            NewIdx.push_back(getScalar(GEP->getOperand(i), Index));
+          auto *GEP = Builder.CreateGEP(NewPtr, NewIdx, EI.getName());
+          return replaceInstUsesWith(EI, GEP);
+        }
+      }
   }
 
   BinaryOperator *BO;
Index: lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- lib/Transforms/InstCombine/InstructionCombining.cpp
+++ lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1549,6 +1549,41 @@
   return CastInst::Create(CastOpc, NarrowBO, BO.getType());
 }
 
+namespace {
+Value *getSplatValue(Value *V) {
+  // TODO: merge both cases below into the existing helper function in
+  // VectorUtils after careful audit of users to ensure they're compatible.  At
+  // the moment, neither are handled by that routine.
+
+  // insertelement undef, X, Y - all elements are undef or X, so we can treat
+  // this like a splat of X.  Note: This is the form simplify demanded elements
+  // tends to produce.
+  if (auto *IE = dyn_cast<InsertElementInst>(V))
+    if (isa<UndefValue>(IE->getOperand(0)))
+      return IE->getOperand(1);
+
+  // TODO: recognize a shuffle broadcast too
+
+  auto *CV = dyn_cast<ConstantVector>(V);
+  if (!CV)
+    return nullptr;
+  if (auto *SplatC = CV->getSplatValue())
+    return SplatC;
+  
+  Constant *Splat = nullptr;
+  for (unsigned i = 0; i < CV->getNumOperands(); i++) {
+    Constant *Elt = CV->getOperand(i);
+    if (isa<UndefValue>(Elt))
+      continue;
+    if (!Splat)
+      Splat = Elt;
+    if (Elt != Splat)
+      return nullptr;
+  }
+  return Splat;
+}
+}
+
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
   Type *GEPType = GEP.getType();
@@ -1600,6 +1635,39 @@
   if (MadeChange)
     return &GEP;
 
+  if (GEPType->isVectorTy()) {
+    // If we can do so without changing the result type of the GEP, scalarize
+    // any operands which are simple vector splats.  A couple of notes here:
+    // 1) If we remove all vector operands, we implicitly change the result
+    //    type of the GEP to a scalar. (TODO: Insert the broadcast and do the
+    //    actual conversion to a scalar GEP.)
+    // 2) We prefer to leave the last vector operand unchanged if needed to
+    //    ensure at least one argument stays vector typed.  This is arbitrary,
+    //    but biases us towards scalarizing pointer operands which seems handy.
+    // 3) We recognize constant splats w/ undef elements, and
+    //    (insertlement undef, X, Y) since both can be interpreted as
+    //    broadcasting X to all (usable) lanes. Since this is the form we tend
+    //    to produce when simplify operands w/demanded elements, it's nice to
+    //    catch that them ere.
+    
+    unsigned LastVectorOperand = GEP.getNumOperands();
+    for (unsigned i = 0; i < GEP.getNumOperands(); i++)
+      if (GEP.getOperand(i)->getType()->isVectorTy())
+        LastVectorOperand = i;
+    assert(LastVectorOperand < GEP.getNumOperands() &&
+           "must have at least one vector operand");
+    
+    for (unsigned i = 0; i < LastVectorOperand; i++)
+      if (GEP.getOperand(i)->getType()->isVectorTy())
+        if (auto *SplatC = getSplatValue(GEP.getOperand(i))) {
+          GEP.setOperand(i, SplatC);
+          MadeChange = true;
+        }
+
+    if (MadeChange)
+      return &GEP;
+  }
+
   // Check to see if the inputs to the PHI node are getelementptr instructions.
   if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
     auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));