Index: include/llvm/IR/Instructions.h
===================================================================
--- include/llvm/IR/Instructions.h
+++ include/llvm/IR/Instructions.h
@@ -837,10 +837,12 @@
     if (!PointeeType)
       PointeeType =
           cast<PointerType>(Ptr->getType()->getScalarType())->getElementType();
+    else if (PointeeType->isVectorTy() == Ptr->getType()->isVectorTy())
+      assert(PointeeType ==
+          cast<PointerType>(Ptr->getType()->getScalarType())->getElementType()->getScalarType());
     else
-      assert(
-          PointeeType ==
-          cast<PointerType>(Ptr->getType()->getScalarType())->getElementType());
+      assert(PointeeType->getScalarType() ==
+          cast<PointerType>(Ptr->getType()->getScalarType())->getElementType()->getScalarType());
     return new (Values) GetElementPtrInst(PointeeType, Ptr, IdxList, Values,
                                           NameStr, InsertBefore);
   }
@@ -852,10 +854,12 @@
     if (!PointeeType)
       PointeeType =
           cast<PointerType>(Ptr->getType()->getScalarType())->getElementType();
+    else if (PointeeType->isVectorTy() == Ptr->getType()->isVectorTy())
+      assert(PointeeType ==
+          cast<PointerType>(Ptr->getType()->getScalarType())->getElementType()->getScalarType());
     else
-      assert(
-          PointeeType ==
-          cast<PointerType>(Ptr->getType()->getScalarType())->getElementType());
+      assert(PointeeType->getScalarType() ==
+          cast<PointerType>(Ptr->getType()->getScalarType())->getElementType()->getScalarType());
     return new (Values) GetElementPtrInst(PointeeType, Ptr, IdxList, Values,
                                           NameStr, InsertAtEnd);
   }
@@ -971,7 +975,12 @@
       unsigned NumElem = cast<VectorType>(Ptr->getType())->getNumElements();
       return VectorType::get(PtrTy, NumElem);
     }
-
+    else
+     for (Value *Index : IdxList)
+       if (Index->getType()->isVectorTy()) {
+         unsigned NumElts = cast<VectorType>(Index->getType())->getNumElements();
+         return VectorType::get(PtrTy, NumElts);
+       }
     // Scalar GEP
     return PtrTy;
   }
Index: lib/AsmParser/LLParser.cpp
===================================================================
--- lib/AsmParser/LLParser.cpp
+++ lib/AsmParser/LLParser.cpp
@@ -5541,6 +5541,11 @@
 
   SmallVector<Value*, 16> Indices;
   bool AteExtraComma = false;
+  // GEP returns a vector of pointers if at least one of parameters is a vector.
+  // All vector parameters should have the same vector width.
+  unsigned GepWidth = BaseType->isVectorTy() ?
+    cast<VectorType>(BaseType)->getNumElements() : 0;
+
   while (EatIfPresent(lltok::comma)) {
     if (Lex.getKind() == lltok::MetadataVar) {
       AteExtraComma = true;
@@ -5549,14 +5554,13 @@
     if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
     if (!Val->getType()->getScalarType()->isIntegerTy())
       return Error(EltLoc, "getelementptr index must be an integer");
-    if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy())
-      return Error(EltLoc, "getelementptr index type missmatch");
+
     if (Val->getType()->isVectorTy()) {
       unsigned ValNumEl = cast<VectorType>(Val->getType())->getNumElements();
-      unsigned PtrNumEl = cast<VectorType>(Ptr->getType())->getNumElements();
-      if (ValNumEl != PtrNumEl)
+      if (GepWidth && GepWidth != ValNumEl)
         return Error(EltLoc,
           "getelementptr vector index has a wrong number of elements");
+      GepWidth = ValNumEl;
     }
     Indices.push_back(Val);
   }
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2760,6 +2760,16 @@
   SDValue N = getValue(Op0);
   SDLoc dl = getCurSDLoc();
 
+  // Normalize Vector GEP - all scalar operands should be converted to the
+  // splat vector
+  unsigned VectorWidth =
+   I.getType()->isVectorTy()? cast<VectorType>(I.getType())->getVectorNumElements() : 0;
+
+  if (VectorWidth && !N.getValueType().isVector()) {
+    MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth);
+    SmallVector<SDValue, 16> Ops(VectorWidth, N);
+    N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+  }
   for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
        OI != E; ++OI) {
     const Value *Idx = *OI;
@@ -2780,11 +2790,18 @@
       APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
 
       // If this is a constant subscript, handle it quickly.
-      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+      const auto *CI = dyn_cast<ConstantInt>(Idx);
+      if (!CI && isa<ConstantDataVector>(Idx) &&
+          cast<ConstantDataVector>(Idx)->getSplatValue())
+        CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
+
+      if (CI) {
         if (CI->isZero())
           continue;
         APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
-        SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy);
+        SDValue OffsVal = VectorWidth ?
+          DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
+          DAG.getConstant(Offs, dl, PtrTy);
         N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
         continue;
       }
@@ -2792,6 +2809,11 @@
       // N = N + Idx * ElementSize;
       SDValue IdxN = getValue(Idx);
 
+      if (!IdxN.getValueType().isVector() && VectorWidth) {
+        MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
+        SmallVector<SDValue, 16> Ops(VectorWidth, IdxN);
+        IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);      
+      }
       // If the index is smaller or larger than intptr_t, truncate or extend
       // it.
       IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
@@ -3062,48 +3084,63 @@
 static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
                            SelectionDAGBuilder* SDB) {
 
+  SelectionDAG& DAG = SDB->DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
   assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type");
   GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
   if (!Gep || Gep->getNumOperands() > 2)
     return false;
-  ShuffleVectorInst *ShuffleInst = 
-    dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand());
-  if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() ||
-      cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() !=
-      Instruction::InsertElement)
+  
+  Value *GepBasePtr = Gep->getPointerOperand();
+  Value *IndexVal = Gep->getOperand(1);
+  // Gep instruction may be located in another basic block, check if we can find
+  // SDValue for all operands
+  if (!SDB->findValue(GepBasePtr) || !SDB->findValue(IndexVal))
     return false;
 
-  Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1);
-
-  SelectionDAG& DAG = SDB->DAG;
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  // Check is the Ptr is inside current basic block
-  // If not, look for the shuffle instruction
-  if (SDB->findValue(Ptr))
+  // If GEP base is scalar - this is a single base
+  if (!GepBasePtr->getType()->isVectorTy()) {
+    Ptr = GepBasePtr;
     Base = SDB->getValue(Ptr);
-  else if (SDB->findValue(ShuffleInst)) {
-    SDValue ShuffleNode = SDB->getValue(ShuffleInst);
-    SDLoc sdl = ShuffleNode;
-    Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl,
-                       ShuffleNode.getValueType().getScalarType(), ShuffleNode,
-                       DAG.getConstant(0, sdl, TLI.getVectorIdxTy()));
-    SDB->setValue(Ptr, Base);
   }
-  else
-    return false;
-
-  Value *IndexVal = Gep->getOperand(1);
-  if (SDB->findValue(IndexVal)) {
-    Index = SDB->getValue(IndexVal);
+  else {
+    ShuffleVectorInst *ShuffleInst = dyn_cast<ShuffleVectorInst>(GepBasePtr);
+    if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() ||
+        cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() !=
+        Instruction::InsertElement)
+      return false;
 
-    if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
-      IndexVal = Sext->getOperand(0);
-      if (SDB->findValue(IndexVal))
-        Index = SDB->getValue(IndexVal);
+    Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1);
+    // Check is the Ptr is inside current basic block
+    // If not, look for the shuffle instruction
+    if (SDB->findValue(Ptr))
+      Base = SDB->getValue(Ptr);
+    else {
+      SDValue ShuffleNode = SDB->getValue(ShuffleInst);
+      SDLoc sdl = ShuffleNode;
+      Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl,
+                         ShuffleNode.getValueType().getScalarType(), ShuffleNode,
+                         DAG.getConstant(0, sdl, TLI.getVectorIdxTy()));
+      SDB->setValue(Ptr, Base);
     }
-    return true;
   }
-  return false;
+
+  Index = SDB->getValue(IndexVal);
+
+  if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+    IndexVal = Sext->getOperand(0);
+    if (SDB->findValue(IndexVal))
+      Index = SDB->getValue(IndexVal);
+  }
+  if (!Index.getValueType().isVector()) {
+    unsigned VectorWidth =
+     cast<VectorType>(Gep->getType())->getVectorNumElements();
+    MVT VT = MVT::getVectorVT(Index.getValueType().getSimpleVT(), VectorWidth);
+    SmallVector<SDValue, 16> Ops(VectorWidth, Index);
+    Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);  
+  }
+  return true;
 }
 
 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
Index: lib/IR/Verifier.cpp
===================================================================
--- lib/IR/Verifier.cpp
+++ lib/IR/Verifier.cpp
@@ -2540,10 +2540,6 @@
   Assert(isa<PointerType>(TargetTy),
          "GEP base pointer is not a vector or a vector of pointers", &GEP);
   Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP);
-  Assert(GEP.getPointerOperandType()->isVectorTy() ==
-             GEP.getType()->isVectorTy(),
-         "Vector GEP must return a vector value", &GEP);
-
   SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
   Type *ElTy =
       GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs);
@@ -2553,17 +2549,18 @@
              GEP.getResultElementType() == ElTy,
          "GEP is not of right type for indices!", &GEP, ElTy);
 
-  if (GEP.getPointerOperandType()->isVectorTy()) {
+  if (GEP.getType()->isVectorTy()) {
     // Additional checks for vector GEPs.
-    unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements();
-    Assert(GepWidth == GEP.getType()->getVectorNumElements(),
-           "Vector GEP result width doesn't match operand's", &GEP);
+    unsigned GepWidth = cast<VectorType>(GEP.getType())->getVectorNumElements();
+    if (GEP.getPointerOperandType()->isVectorTy())
+      Assert(GepWidth == GEP.getPointerOperandType()->getVectorNumElements(),
+             "Vector GEP result width doesn't match operand's", &GEP);
     for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
       Type *IndexTy = Idxs[i]->getType();
-      Assert(IndexTy->isVectorTy(), "Vector GEP must have vector indices!",
-             &GEP);
-      unsigned IndexWidth = IndexTy->getVectorNumElements();
-      Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+      if (IndexTy->isVectorTy()) {
+        unsigned IndexWidth = cast<VectorType>(IndexTy)->getVectorNumElements();
+        Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+      }
     }
   }
   visitInstruction(GEP);
Index: test/Assembler/getelementptr_vec_idx1.ll
===================================================================
--- test/Assembler/getelementptr_vec_idx1.ll
+++ test/Assembler/getelementptr_vec_idx1.ll
@@ -1,8 +1,8 @@
 ; RUN: not llvm-as < %s >/dev/null 2> %t
 ; RUN: FileCheck %s < %t
-; Test that a vector index is only used with a vector pointer.
+; Test that a vector GEP may be used with a scalar base, the result is a vector of pointers
 
-; CHECK: getelementptr index type missmatch
+; CHECK: '%w' defined with type '<2 x i32*>
 
 define i32 @test(i32* %a) {
   %w = getelementptr i32, i32* %a, <2 x i32> <i32 5, i32 9>
Index: test/Assembler/getelementptr_vec_idx2.ll
===================================================================
--- test/Assembler/getelementptr_vec_idx2.ll
+++ test/Assembler/getelementptr_vec_idx2.ll
@@ -1,10 +1,24 @@
 ; RUN: not llvm-as < %s >/dev/null 2> %t
 ; RUN: FileCheck %s < %t
-; Test that a vector pointer is only used with a vector index.
+; Test that a vector pointer may be used with a scalar index.
+; Test that a vector pointer and vector index should have the same vector width
 
-; CHECK: getelementptr index type missmatch
-
-define <2 x i32> @test(<2 x i32*> %a) {
+; This code is correct
+define <2 x i32*> @test2(<2 x i32*> %a) {
   %w = getelementptr i32, <2 x i32*> %a, i32 2
+  ret <2 x i32*> %w
+}
+
+; This code is correct
+define <2 x i32*> @test3(i32* %a) {
+  %w = getelementptr i32, i32* %a, <2 x i32> <i32 2, i32 2>
+  ret <2 x i32*> %w
+}
+
+; CHECK: getelementptr vector index has a wrong number of elements
+
+define <2 x i32> @test1(<2 x i32*> %a) {
+  %w = getelementptr i32, <2 x i32*> %a, <4 x i32><i32 2, i32 2, i32 2, i32 2>
   ret <2 x i32> %w
 }
+
Index: test/Assembler/getelementptr_vec_idx3.ll
===================================================================
--- test/Assembler/getelementptr_vec_idx3.ll
+++ test/Assembler/getelementptr_vec_idx3.ll
@@ -1,8 +1,8 @@
 ; RUN: not llvm-as < %s >/dev/null 2> %t
 ; RUN: FileCheck %s < %t
-; Test that vector indices have the same number of elements as the pointer.
+; Test that a vector GEP may be used with a scalar base, the result is a vector of pointers
 
-; CHECK: getelementptr index type missmatch
+; CHECK: '%w' defined with type '<2 x <4 x i32>*>'
 
 define <4 x i32> @test(<4 x i32>* %a) {
   %w = getelementptr <4 x i32>, <4 x i32>* %a, <2 x i32> <i32 5, i32 9>
Index: test/CodeGen/X86/masked_gather_scatter.ll
===================================================================
--- test/CodeGen/X86/masked_gather_scatter.ll
+++ test/CodeGen/X86/masked_gather_scatter.ll
@@ -140,3 +140,78 @@
   %res = add <16 x i32> %gt1, %gt2
   ret <16 x i32> %res
 }
+
+%struct.RT = type { i8, [10 x [20 x i32]], i8 }
+%struct.ST = type { i32, double, %struct.RT }
+
+; Test9 and Test10 should give the same result
+
+; KNL-LABEL: test9
+; KNL: vpbroadcastq    %rdi, %zmm
+; KNL: vpmovsxdq
+; KNL: vpbroadcastq
+; KNL: vpmuludq
+; KNL: vpaddq
+; KNL: vpaddq
+; KNL: vpaddq
+; KNL: vpaddq
+; KNL: vpgatherqd      (,%zmm
+
+define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
+entry:
+  %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
+  %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
+
+  %arrayidx = getelementptr  %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <8 x i32><i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <8 x i32> %ind5, <8 x i64> <i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13>
+  %res = call <8 x i32 >  @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+  ret <8 x i32> %res
+}
+
+; KNL-LABEL: test10
+; KNL: vpbroadcastq    %rdi, %zmm
+; KNL: vpmovsxdq
+; KNL: vpbroadcastq
+; KNL: vpmuludq
+; KNL: vpaddq
+; KNL: vpaddq
+; KNL: vpaddq
+; KNL: vpaddq
+; KNL: vpgatherqd      (,%zmm
+define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
+entry:
+  %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
+  %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
+
+  %arrayidx = getelementptr  %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13
+  %res = call <8 x i32 >  @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+  ret <8 x i32> %res
+}
+
+; KNL-LABEL: test11
+; KNL: vpbroadcastd    %esi, %zmm
+; KNL: vgatherdps      (%rdi,%zmm
+
+define <16 x float> @test11(float* %base, i32 %ind) {
+
+  %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
+  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
+
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  ret <16 x float>%res
+}
+
+; KNL-LABEL: test12
+; KNL: vgatherdps      (%rdi,%zmm
+define <16 x float> @test12(float* %base, <16 x i32> %ind) {
+
+  %sext_ind = sext <16 x i32> %ind to <16 x i64>
+  %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
+
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  ret <16 x float>%res
+}
+
+
+
Index: test/CodeGen/X86/vector-gep.ll
===================================================================
--- test/CodeGen/X86/vector-gep.ll
+++ test/CodeGen/X86/vector-gep.ll
@@ -92,3 +92,36 @@
 ;CHECK: ret
 }
 
+;CHECK-LABEL: AGEP7:
+define <4 x i8*> @AGEP7(<4 x i8*> %param, i32 %off) nounwind {
+entry:
+;CHECK-LABEL: AGEP7
+;CHECK-NOT: pslld
+  %A = getelementptr i8, <4 x i8*> %param, i32 %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
+;CHECK-LABEL: AGEP8:
+define <4 x i8*> @AGEP8(<4 x i8*> %param, i8 %off) nounwind {
+entry:
+;CHECK-LABEL: AGEP8
+;CHECK: vpaddd
+  %A = getelementptr i8, <4 x i8*> %param, i8 %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
+;CHECK-LABEL: AGEP9:
+define <4 x i16*> @AGEP9(i16* %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK-LABEL: AGEP9
+; Multiply offset by two (add it to itself).
+;CHECK: vpadd
+; add the base to the offset
+;CHECK: vbroadcastss
+;CHECK-NEXT: vpadd
+  %A = getelementptr i16, i16* %param, <4 x i32> %off
+  ret <4 x i16*> %A
+;CHECK: ret
+}