Index: llvm/lib/Analysis/ConstantFolding.cpp
===================================================================
--- llvm/lib/Analysis/ConstantFolding.cpp
+++ llvm/lib/Analysis/ConstantFolding.cpp
@@ -501,16 +501,22 @@
 
   if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
       isa<ConstantDataSequential>(C)) {
-    uint64_t NumElts;
+    uint64_t NumElts, EltSize;
     Type *EltTy;
     if (auto *AT = dyn_cast<ArrayType>(C->getType())) {
       NumElts = AT->getNumElements();
       EltTy = AT->getElementType();
+      EltSize = DL.getTypeAllocSize(EltTy);
     } else {
       NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
       EltTy = cast<FixedVectorType>(C->getType())->getElementType();
+      // TODO: For non-byte-sized vectors, current implementation assumes there is
+      // padding to the next byte boundary between elements.
+      if (!DL.typeSizeEqualsStoreSize(EltTy))
+        return false;
+
+      EltSize = DL.getTypeStoreSize(EltTy);
     }
-    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
     uint64_t Index = ByteOffset / EltSize;
     uint64_t Offset = ByteOffset - Index * EltSize;
 
Index: llvm/test/Transforms/InstCombine/load-gep-overalign.ll
===================================================================
--- llvm/test/Transforms/InstCombine/load-gep-overalign.ll
+++ llvm/test/Transforms/InstCombine/load-gep-overalign.ll
@@ -11,10 +11,6 @@
 ; Access and report each individual byte in @foo.
 ; OVERALIGNED and NATURAL should have the same result, because the layout of vectors ignores
 ; element type alignment, and thus the representation of @foo is the same in both cases.
-;
-; TODO: The OVERALIGNED result is incorrect, as apparently padding bytes
-; are assumed as they would appear in an array. In vectors, there is no padding.
-;
 ; NATURAL-LABEL: @test_vector_load_i8(
 ; NATURAL-NEXT:    call void @report(i64 0, i8 1)
 ; NATURAL-NEXT:    call void @report(i64 1, i8 35)
@@ -29,12 +25,12 @@
 ; OVERALIGNED-LABEL: @test_vector_load_i8(
 ; OVERALIGNED-NEXT:    call void @report(i64 0, i8 1)
 ; OVERALIGNED-NEXT:    call void @report(i64 1, i8 35)
-; OVERALIGNED-NEXT:    call void @report(i64 2, i8 0)
-; OVERALIGNED-NEXT:    call void @report(i64 3, i8 0)
-; OVERALIGNED-NEXT:    call void @report(i64 4, i8 69)
-; OVERALIGNED-NEXT:    call void @report(i64 5, i8 103)
-; OVERALIGNED-NEXT:    call void @report(i64 6, i8 0)
-; OVERALIGNED-NEXT:    call void @report(i64 7, i8 0)
+; OVERALIGNED-NEXT:    call void @report(i64 2, i8 69)
+; OVERALIGNED-NEXT:    call void @report(i64 3, i8 103)
+; OVERALIGNED-NEXT:    call void @report(i64 4, i8 -119)
+; OVERALIGNED-NEXT:    call void @report(i64 5, i8 -85)
+; OVERALIGNED-NEXT:    call void @report(i64 6, i8 -51)
+; OVERALIGNED-NEXT:    call void @report(i64 7, i8 -17)
 ; OVERALIGNED-NEXT:    ret void
 ;
   %ptr0 = getelementptr i8, ptr @foo, i64 0
Index: llvm/test/Transforms/InstCombine/load-non-byte-sized-vector.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InstCombine/load-non-byte-sized-vector.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -passes=instcombine -S %s | FileCheck %s
+
+@foo = constant <4 x i4> <i4 u0x1, i4 u0x2, i4 u0x3, i4 u0x4>, align 8
+declare void @report(i64 %index, i4 %val)
+
+define void @test_vector_load_i4_non_byte_sized() {
+; TODO: For non-byte-sized vectors, current implementation assumes there is
+; padding to the next byte boundary between elements.
+; CHECK-LABEL: define void @test_vector_load_i4_non_byte_sized() {
+; CHECK-NEXT:    [[RES0:%.*]] = load i4, ptr @foo, align 8
+; CHECK-NEXT:    call void @report(i64 0, i4 [[RES0]])
+; CHECK-NEXT:    [[RES1:%.*]] = load i4, ptr getelementptr (i8, ptr @foo, i64 1), align 1
+; CHECK-NEXT:    call void @report(i64 1, i4 [[RES1]])
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i8, ptr @foo, i64 0
+  %res0 = load i4, ptr %ptr0, align 1
+  call void @report(i64 0, i4 %res0)
+
+  %ptr1 = getelementptr i8, ptr @foo, i64 1
+  %res1 = load i4, ptr %ptr1, align 1
+  call void @report(i64 1, i4 %res1)
+
+  ret void
+}