Index: llvm/lib/Analysis/ConstantFolding.cpp
===================================================================
--- llvm/lib/Analysis/ConstantFolding.cpp
+++ llvm/lib/Analysis/ConstantFolding.cpp
@@ -501,16 +501,22 @@
 
   if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
       isa<ConstantDataSequential>(C)) {
-    uint64_t NumElts;
+    uint64_t NumElts, EltSize;
     Type *EltTy;
     if (auto *AT = dyn_cast<ArrayType>(C->getType())) {
       NumElts = AT->getNumElements();
       EltTy = AT->getElementType();
+      EltSize = DL.getTypeAllocSize(EltTy);
     } else {
       NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
       EltTy = cast<FixedVectorType>(C->getType())->getElementType();
+      // TODO: For non-byte-sized vectors, current implementation assumes there is
+      // padding to the next byte boundary between elements.
+      if (!DL.typeSizeEqualsStoreSize(EltTy))
+        return false;
+
+      EltSize = DL.getTypeStoreSize(EltTy);
     }
-    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
     uint64_t Index = ByteOffset / EltSize;
     uint64_t Offset = ByteOffset - Index * EltSize;
 
Index: llvm/test/Transforms/InstCombine/load-gep-overalign.ll
===================================================================
--- llvm/test/Transforms/InstCombine/load-gep-overalign.ll
+++ llvm/test/Transforms/InstCombine/load-gep-overalign.ll
@@ -11,10 +11,6 @@
 ; Access and report each individual byte in @foo.
 ; OVERALIGNED and NATURAL should have the same result, because the layout of vectors ignores
 ; element type alignment, and thus the representation of @foo is the same in both cases.
-;
-; TODO: The OVERALIGNED result is incorrect, as apparently padding bytes
-; are assumed as they would appear in an array. In vectors, there is no padding.
-;
 ; NATURAL-LABEL: @test_vector_load_i8(
 ; NATURAL-NEXT:    call void @report(i64 0, i8 1)
 ; NATURAL-NEXT:    call void @report(i64 1, i8 35)
@@ -29,12 +25,12 @@
 ; OVERALIGNED-LABEL: @test_vector_load_i8(
 ; OVERALIGNED-NEXT:    call void @report(i64 0, i8 1)
 ; OVERALIGNED-NEXT:    call void @report(i64 1, i8 35)
-; OVERALIGNED-NEXT:    call void @report(i64 2, i8 0)
-; OVERALIGNED-NEXT:    call void @report(i64 3, i8 0)
-; OVERALIGNED-NEXT:    call void @report(i64 4, i8 69)
-; OVERALIGNED-NEXT:    call void @report(i64 5, i8 103)
-; OVERALIGNED-NEXT:    call void @report(i64 6, i8 0)
-; OVERALIGNED-NEXT:    call void @report(i64 7, i8 0)
+; OVERALIGNED-NEXT:    call void @report(i64 2, i8 69)
+; OVERALIGNED-NEXT:    call void @report(i64 3, i8 103)
+; OVERALIGNED-NEXT:    call void @report(i64 4, i8 -119)
+; OVERALIGNED-NEXT:    call void @report(i64 5, i8 -85)
+; OVERALIGNED-NEXT:    call void @report(i64 6, i8 -51)
+; OVERALIGNED-NEXT:    call void @report(i64 7, i8 -17)
 ; OVERALIGNED-NEXT:    ret void
 ;
   %ptr0 = getelementptr i8, ptr @foo, i64 0
Index: llvm/test/Transforms/InstCombine/load.ll
===================================================================
--- llvm/test/Transforms/InstCombine/load.ll
+++ llvm/test/Transforms/InstCombine/load.ll
@@ -413,3 +413,17 @@
   %d = load i32, ptr %b
   ret i32 %d
 }
+
+; TODO: For non-byte-sized vectors, current implementation assumes there is
+; padding to the next byte boundary between elements.
+@foo = constant <2 x i4> <i4 u0x1, i4 u0x2>, align 8
+
+define i4 @test_vector_load_i4_non_byte_sized() {
+; CHECK-LABEL: @test_vector_load_i4_non_byte_sized(
+; CHECK-NEXT:    [[RES0:%.*]] = load i4, ptr @foo, align 8
+; CHECK-NEXT:    ret i4 [[RES0]]
+;
+  %ptr0 = getelementptr i8, ptr @foo, i64 0
+  %res0 = load i4, ptr %ptr0, align 1
+  ret i4 %res0
+}