diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3627,11 +3627,10 @@
 
 In general vector elements are laid out in memory in the same way as
 :ref:`array types <t_array>`. Such an analogy works fine as long as the vector
-elements are byte sized. However, when the elements of the vector aren't byte
-sized it gets a bit more complicated. One way to describe the layout is by
-describing what happens when a vector such as <N x iM> is bitcasted to an
-integer type with N*M bits, and then following the rules for storing such an
-integer to memory.
+elements are byte sized and naturally aligned. Otherwise, it gets a bit more
+complicated. One way to describe the layout is by describing what happens
+when a vector such as <N x iM> is bitcasted to an integer type with N*M bits,
+and then following the rules for storing such an integer to memory.
 
 A bitcast from a vector type to a scalar integer type will see the elements
 being packed together (without padding). The order in which elements are
diff --git a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
--- a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
+++ b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
@@ -31,7 +31,7 @@
 class generic_gep_type_iterator {
 
   ItTy OpIt;
-  PointerUnion<StructType *, Type *> CurTy;
+  PointerUnion<StructType *, VectorType *, Type *> CurTy;
 
   generic_gep_type_iterator() = default;
 
@@ -70,7 +70,9 @@
   Type *getIndexedType() const {
     if (auto *T = CurTy.dyn_cast<Type *>())
       return T;
-    return CurTy.get<StructType *>()->getTypeAtIndex(getOperand());
+    if (auto *STy = CurTy.dyn_cast<StructType *>())
+      return STy->getTypeAtIndex(getOperand());
+    return CurTy.get<VectorType *>()->getElementType();
   }
 
   Value *getOperand() const { return const_cast<Value *>(&**OpIt); }
@@ -80,7 +82,7 @@
     if (auto *ATy = dyn_cast<ArrayType>(Ty))
       CurTy = ATy->getElementType();
     else if (auto *VTy = dyn_cast<VectorType>(Ty))
-      CurTy = VTy->getElementType();
+      CurTy = VTy;
     else
       CurTy = dyn_cast<StructType>(Ty);
     ++OpIt;
@@ -109,7 +111,8 @@
   // that.
 
   bool isStruct() const { return CurTy.is<StructType *>(); }
-  bool isSequential() const { return CurTy.is<Type *>(); }
+  bool isVector() const { return CurTy.is<VectorType *>(); }
+  bool isSequential() const { return !isStruct(); }
 
   StructType *getStructType() const { return CurTy.get<StructType *>(); }
 
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -387,6 +387,17 @@
       (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds);
   }
 
+  // Get the size of the indexed element in its containing outer type.
+  //
+  // If OuterType is a VectorType, the unpadded element size is returned,
+  // which must be byte-aligned.
+  // Otherwise (if OuterType is a StructType or ArrayType), the indexed
+  // element's AllocSize is returned.
+  //
+  // Useful to compute byte-based offsets of elements within the outer type.
+  static TypeSize getElementSize(const DataLayout &DL, Type *ElementTy,
+                                 bool OuterIsVector);
+
 public:
   /// Test whether this is an inbounds GEP, as defined by LangRef.html.
   bool isInBounds() const {
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -56,6 +56,16 @@
   return cast<GetElementPtrConstantExpr>(this)->getResultElementType();
 }
 
+TypeSize GEPOperator::getElementSize(const DataLayout &DL, Type *ElementTy,
+                                     bool OuterIsVector) {
+  if (!OuterIsVector)
+    return DL.getTypeAllocSize(ElementTy);
+
+  auto BitSize = DL.getTypeSizeInBits(ElementTy);
+  assert(BitSize % 8 == 0 && "GEP element size must be byte-aligned!");
+  return {BitSize / 8, BitSize.isScalable()};
+}
+
 Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const {
   /// compute the worse possible offset for every level of the GEP et accumulate
   /// the minimum alignment into Result.
@@ -76,7 +86,8 @@
       int64_t ElemCount = 1;
       if (OpC)
         ElemCount = OpC->getZExtValue();
-      Offset = DL.getTypeAllocSize(GTI.getIndexedType()) * ElemCount;
+      Offset =
+          getElementSize(DL, GTI.getIndexedType(), GTI.isVector()) * ElemCount;
     }
     Result = Align(MinAlign(Offset, Result.value()));
   }
@@ -147,8 +158,9 @@
           return false;
         continue;
       }
-      if (!AccumulateOffset(ConstOffset->getValue(),
-                            DL.getTypeAllocSize(GTI.getIndexedType())))
+      if (!AccumulateOffset(
+              ConstOffset->getValue(),
+              getElementSize(DL, GTI.getIndexedType(), GTI.isVector())))
         return false;
       continue;
     }
@@ -161,8 +173,9 @@
     if (!ExternalAnalysis(*V, AnalysisIndex))
       return false;
     UsedExternalAnalysis = true;
-    if (!AccumulateOffset(AnalysisIndex,
-                          DL.getTypeAllocSize(GTI.getIndexedType())))
+    if (!AccumulateOffset(
+            AnalysisIndex,
+            getElementSize(DL, GTI.getIndexedType(), GTI.isVector())))
       return false;
   }
   return true;
@@ -208,15 +221,16 @@
                               1);
         continue;
       }
-      CollectConstantOffset(ConstOffset->getValue(),
-                            DL.getTypeAllocSize(GTI.getIndexedType()));
+      CollectConstantOffset(
+          ConstOffset->getValue(),
+          getElementSize(DL, GTI.getIndexedType(), GTI.isVector()));
       continue;
     }
 
     if (STy || ScalableType)
       return false;
-    APInt IndexedSize =
-        APInt(BitWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
+    APInt IndexedSize = APInt(
+        BitWidth, getElementSize(DL, GTI.getIndexedType(), GTI.isVector()));
     // Insert an initial offset of 0 for V iff none exists already, then
     // increment the offset by IndexedSize.
     if (!IndexedSize.isZero()) {
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1508,6 +1508,9 @@
                                       Value *Ptr, APInt Offset, Type *TargetTy,
                                       SmallVectorImpl<Value *> &Indices,
                                       const Twine &NamePrefix) {
+#ifndef NDEBUG
+  APInt OrigOffset = Offset;
+#endif
   PointerType *Ty = cast<PointerType>(Ptr->getType());
 
   // Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1525,8 +1528,20 @@
 
   for (const APInt &Index : IntIndices)
     Indices.push_back(IRB.getInt(Index));
-  return getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, Indices,
-                               NamePrefix);
+  Value *Result = getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy,
+                                        Indices, NamePrefix);
+#ifndef NDEBUG
+  auto *GEP = dyn_cast<GetElementPtrInst>(Result);
+  if (GEP && GEP->getPointerOperand() == Ptr) {
+    APInt GEPOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+    assert(GEP->accumulateConstantOffset(DL, GEPOffset) &&
+           "Expected GEP with constant offset!");
+    assert(APInt::isSameValue(GEPOffset, OrigOffset) &&
+           "GEP has incorrect offset!");
+  }
+#endif
+
+  return Result;
 }
 
 /// Compute an adjusted pointer from Ptr by Offset bytes where the
diff --git a/llvm/test/Transforms/SROA/overaligned-datalayout.ll b/llvm/test/Transforms/SROA/overaligned-datalayout.ll
--- a/llvm/test/Transforms/SROA/overaligned-datalayout.ll
+++ b/llvm/test/Transforms/SROA/overaligned-datalayout.ll
@@ -36,7 +36,12 @@
 %VecStruct = type { <4 x i16> }
 define i8 @test_vector_bitcast_i8() {
 ; OVERALIGNED-LABEL: @test_vector_bitcast_i8(
-; OVERALIGNED-NEXT:    ret i8 poison
+; OVERALIGNED-NEXT:    [[ALLOCA_SROA_0:%.*]] = alloca <4 x i16>, align 8
+; OVERALIGNED-NEXT:    store <4 x i16> <i16 0, i16 1, i16 2, i16 3>, <4 x i16>* [[ALLOCA_SROA_0]], align 8
+; OVERALIGNED-NEXT:    [[ALLOCA_SROA_0_6_I8PTROFFSET_SROA_IDX1:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[ALLOCA_SROA_0]], i64 0, i64 3
+; OVERALIGNED-NEXT:    [[ALLOCA_SROA_0_6_I8PTROFFSET_SROA_CAST2:%.*]] = bitcast i16* [[ALLOCA_SROA_0_6_I8PTROFFSET_SROA_IDX1]] to i8*
+; OVERALIGNED-NEXT:    [[ALLOCA_SROA_0_6_ALLOCA_SROA_0_6_RES:%.*]] = load i8, i8* [[ALLOCA_SROA_0_6_I8PTROFFSET_SROA_CAST2]], align 2
+; OVERALIGNED-NEXT:    ret i8 [[ALLOCA_SROA_0_6_ALLOCA_SROA_0_6_RES]]
 ;
 ; NATURAL-LABEL: @test_vector_bitcast_i8(
 ; NATURAL-NEXT:    [[ALLOCA_SROA_0:%.*]] = alloca <4 x i16>, align 8
diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp
--- a/llvm/unittests/IR/InstructionsTest.cpp
+++ b/llvm/unittests/IR/InstructionsTest.cpp
@@ -556,6 +556,65 @@
   delete PtrVecB;
 }
 
+TEST(InstructionsTest, GepOffsets) {
+  // Test byte-based offsets of GEPs into vectors and arrays,
+  // including the case of overaligned element types.
+  LLVMContext C;
+  DataLayout DefaultDL(
+      "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3"
+      "2:32:32-f64:64:64-v64:64:64-v128:128:128-a:0:64-s:64:64-f80"
+      ":128:128-n8:16:32:64-S128");
+  DataLayout AlignMin32DL(
+      "e-p:64:64:64-i1:8:8-i8:8:8-i16:32:32-i32:32:32-i64:64:64-f3"
+      "2:32:32-f64:64:64-v64:64:64-v128:128:128-a:0:64-s:64:64-f80"
+      ":128:128-n8:16:32:64-S128");
+  DataLayout AlignMin64DL(
+      "e-p:64:64:64-i1:8:8-i8:8:8-i16:64:64-i32:64:64-i64:64:64-f3"
+      "2:64:64-f64:64:64-v64:64:64-v128:128:128-a:0:64-s:64:64-f80"
+      ":128:128-n8:16:32:64-S128");
+
+  for (uint64_t ElemBitWidth : {8, 16, 24, 32, 64}) {
+    IntegerType *ElemTy = IntegerType::get(C, ElemBitWidth);
+    EXPECT_EQ(DefaultDL.getTypeSizeInBits(ElemTy), ElemBitWidth);
+
+    {
+      // Check GEP into vector
+      VectorType *VecTy = FixedVectorType::get(ElemTy, 8);
+      Constant *VectorNullPtr = Constant::getNullValue(VecTy->getPointerTo());
+      std::unique_ptr<llvm::GetElementPtrInst> Gep(GetElementPtrInst::Create(
+          VecTy, VectorNullPtr,
+          {ConstantInt::get(Type::getInt32Ty(C), 0),
+           ConstantInt::get(Type::getInt32Ty(C), 1)}));
+
+      for (const DataLayout *DL : {&DefaultDL, &AlignMin32DL, &AlignMin64DL}) {
+        EXPECT_EQ(DL->getTypeSizeInBits(VecTy), ElemBitWidth * 8);
+        APInt GEPOffset(DL->getIndexTypeSizeInBits(Gep->getType()), 0);
+        EXPECT_TRUE(Gep->accumulateConstantOffset(*DL, GEPOffset));
+        EXPECT_EQ(DL->getTypeSizeInBits(ElemTy), ElemBitWidth);
+        EXPECT_EQ(GEPOffset.getZExtValue(), ElemBitWidth / 8);
+      }
+    }
+    {
+      // Check GEP into array
+      ArrayType *ArrTy = ArrayType::get(ElemTy, 8);
+      Constant *ArrayNullPtr = Constant::getNullValue(ArrTy->getPointerTo());
+      std::unique_ptr<llvm::GetElementPtrInst> Gep(GetElementPtrInst::Create(
+          ArrTy, ArrayNullPtr,
+          {ConstantInt::get(Type::getInt32Ty(C), 0),
+           ConstantInt::get(Type::getInt32Ty(C), 1)}));
+
+      for (const DataLayout *DL : {&DefaultDL, &AlignMin32DL, &AlignMin64DL}) {
+        EXPECT_EQ(DL->getTypeSizeInBits(ArrTy),
+                  DL->getTypeAllocSizeInBits(ElemTy) * 8);
+        APInt GEPOffset(DL->getIndexTypeSizeInBits(Gep->getType()), 0);
+        EXPECT_TRUE(Gep->accumulateConstantOffset(*DL, GEPOffset));
+        EXPECT_GE(DL->getTypeAllocSizeInBits(ElemTy), ElemBitWidth);
+        EXPECT_EQ(GEPOffset.getZExtValue(), DL->getTypeAllocSize(ElemTy));
+      }
+    }
+  }
+}
+
 TEST(InstructionsTest, FPMathOperator) {
   LLVMContext Context;
   IRBuilder<> Builder(Context);