Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -245,6 +245,9 @@
     MPM.add(createLoopInterchangePass()); // Interchange loops
     MPM.add(createCFGSimplificationPass());
   }
+
+  MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
+
   if (!DisableUnrollLoops)
     MPM.add(createSimpleLoopUnrollPass());    // Unroll small loops
   addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
@@ -465,7 +468,6 @@
   // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
   MPM.add(createLoopDistributePass(/*ProcessAllLoopsByDefault=*/false));
 
-  MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
 
   // Eliminate loads by forwarding stores from the previous iteration to loads
   // of the current iteration.
Index: llvm/test/Other/pass-pipelines.ll
===================================================================
--- llvm/test/Other/pass-pipelines.ll
+++ llvm/test/Other/pass-pipelines.ll
@@ -49,6 +49,10 @@
 ; FIXME: It isn't clear that we need yet another loop pass pipeline
 ; and run of LICM here.
 ; CHECK-O2-NOT: Manager
+; CHECK-O2: Loop Vectorization
+; CHECK-O2-NOT: Manager
+; CHECK-O2: Loop Pass Manager
+; Check-O2-NEXT: Unroll loops
 ; CHECK-O2: Loop Pass Manager
 ; CHECK-O2-NEXT: Loop Invariant Code Motion
 ; CHECK-O2-NOT: Manager
@@ -69,8 +73,6 @@
 ; CHECK-O2: Loop Pass Manager
 ; CHECK-O2-NEXT: Rotate Loops
 ; CHECK-O2-NOT: Manager
-; CHECK-O2: Loop Vectorization
-; CHECK-O2-NOT: Manager
 ; CHECK-O2: SLP Vectorizer
 ; CHECK-O2-NOT: Manager
 ; After vectorization we do partial unrolling.
Index: llvm/test/Transforms/LoopVectorize/LoopWithConstTripCount1.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/LoopWithConstTripCount1.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+;This is test is to verify the loop with constant trip count is getting vectorize.
+;original C code:
+;int a[32];
+;int reduce_add() {
+;  int s = 0;
+;  for (int i = 0; i < 32; ++i)
+;    s = s + a[i];
+;  return s;
+;}
+;
+;https://llvm.org/bugs/show_bug.cgi?id=25748
+;
+;CHECK-LABEL:@_Z6reduceR1V
+;CHECK:load <4 x i16>
+;CHECK:zext <4 x i16>
+;CHECK:add <4 x i32>
+;CHECK:extractelement <4 x i32>
+
+%struct.V = type { [32 x i16] }
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z6reduceR1V(%struct.V* dereferenceable(64) %v) #0 {
+entry:
+  %v.addr = alloca %struct.V*, align 8
+  %sum = alloca i32, align 4
+  %i = alloca i32, align 4
+  store %struct.V* %v, %struct.V** %v.addr, align 8
+  store i32 0, i32* %sum, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %1 = load %struct.V*, %struct.V** %v.addr, align 8
+  %cmp = icmp slt i32 %0, 32
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %2 to i64
+  %3 = load %struct.V*, %struct.V** %v.addr, align 8
+  %data = getelementptr inbounds %struct.V, %struct.V* %3, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [32 x i16], [32 x i16]* %data, i64 0, i64 %idxprom
+  %4 = load i16, i16* %arrayidx, align 2
+  %conv = zext i16 %4 to i32
+  %5 = load i32, i32* %sum, align 4
+  %add = add nsw i32 %5, %conv
+  store i32 %add, i32* %sum, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %6 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %6, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %7 = load i32, i32* %sum, align 4
+  ret i32 %7
+}
+
Index: llvm/test/Transforms/LoopVectorize/LoopWithConstTripCount2.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/LoopWithConstTripCount2.ll
@@ -0,0 +1,64 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;This test is to verify that loop with constant trip count is getting vectorize.
+;original code:
+;struct V {
+;  static constexpr int length = 32;
+;  unsigned short data[32];
+;};
+;
+;int reduce(V &v) {
+;  int sum = 0;
+;  for (int i = 0; i < v.length; ++i) {
+;    sum += static_cast<int>(v.data[i]);
+;  }
+;  return sum;
+;}
+;
+;https://llvm.org/bugs/show_bug.cgi?id=28090
+
+;CHECK-LABEL: @reduce_add
+;CHECK:load <4 x i32>
+;CHECK:add <4 x i32>
+;CHECK:extractelement <4 x i32>
+
+@a = common global [32 x i32] zeroinitializer, align 16
+
+define i32 @reduce_add() #0 {
+entry:
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %s, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %cmp = icmp slt i32 %0, 32
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, i32* %s, align 4
+  %2 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %2 to i64
+  %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @a, i64 0, i64 %idxprom
+  %3 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %1, %3
+  store i32 %add, i32* %s, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %4 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %4, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %5 = load i32, i32* %s, align 4
+  ret i32 %5
+}
+