Index: include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfo.h
+++ include/llvm/Analysis/TargetTransformInfo.h
@@ -966,27 +966,29 @@
                              ReductionFlags Flags) const;
 
   /// \returns True if the target wants to handle the given reduction idiom in
-  /// scalarized shuffle form instead of vectorized shuffle form.
+  /// variable-length-vector shuffle form instead of fixed-length-vector
+  /// shuffle form (which gets generated by getShuffleReduction()).
   /// E.g.
   ///
-  /// Scalarized shuffle form:
-  ///  %rdx.shuf = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32>
-  ///                            <i32 2, i32 3, i32 undef, i32 undef>
-  ///  %bin.rdx = fadd fast <4 x float> %a, %rdx.shuf
-  ///  %0 = extractelement <4 x float> %bin.rdx, i32 0
-  ///  %1 = extractelement <4 x float> %bin.rdx, i32 1
-  ///  %res = fadd fast float %0, %1 // scalar operation follows.
+  /// Variable-length-vector shuffle form:
+  ///  %rdx.shuf1 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32>
+  ///                             <i32 0, i32 1>
+  ///  %rdx.shuf2 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32>
+  ///                            <i32 2, i32 3>
+  ///  %bin.rdx = fadd fast <2 x float> %rdx.shuf1, %rdx.shuf2
+  ///  %0 = extractelement <2 x float> %bin.rdx, i32 0
+  ///  %1 = extractelement <2 x float> %bin.rdx, i32 1
+  ///  %res = fadd fast float %0, %1
   ///
-  /// Vectorized shuffle form:
+  /// Fixed-length-vector shuffle form:
   ///  %rdx.shuf = shufflevector <4 x float> %a, <4 x float> undef,
   ///                            <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
   ///  %bin.rdx = fadd fast <4 x float> %a, %rdx.shuf
   ///  %rdx.shuf1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
   ///                             <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  ///  %bin.rdx2 = fadd fast <4 x float> %bin.rdx, %rdx.shuf1 // vector operation
-  ///                                                         // follows.
+  ///  %bin.rdx2 = fadd fast <4 x float> %bin.rdx, %rdx.shuf1
   ///  %res = extractelement <4 x float> %bin.rdx2, i32 0
-  bool useScalarizedShuffleReduction() const;
+  bool useVariableLengthShuffleReduction() const;
 
   /// \returns True if the target wants to expand the given reduction intrinsic
   /// into a shuffle sequence.
@@ -1189,7 +1191,7 @@
                                         VectorType *VecTy) const = 0;
   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
                                      ReductionFlags) const = 0;
-  virtual bool useScalarizedShuffleReduction() const = 0;
+  virtual bool useVariableLengthShuffleReduction() const = 0;
   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
   virtual int getInstructionLatency(const Instruction *I) = 0;
 };
@@ -1585,8 +1587,8 @@
                              ReductionFlags Flags) const override {
     return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
   }
-  bool useScalarizedShuffleReduction() const override {
-    return Impl.useScalarizedShuffleReduction();
+  bool useVariableLengthShuffleReduction() const override {
+    return Impl.useVariableLengthShuffleReduction();
   }
   bool shouldExpandReduction(const IntrinsicInst *II) const override {
     return Impl.shouldExpandReduction(II);
Index: include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfoImpl.h
+++ include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -564,7 +564,7 @@
     return false;
   }
 
-  bool useScalarizedShuffleReduction() const {
+  bool useVariableLengthShuffleReduction() const {
     return false;
   }
 
Index: include/llvm/Transforms/Utils/LoopUtils.h
===================================================================
--- include/llvm/Transforms/Utils/LoopUtils.h
+++ include/llvm/Transforms/Utils/LoopUtils.h
@@ -517,14 +517,10 @@
                     ArrayRef<Value *> RedOps = None);
 
 /// Generates a vector reduction using shufflevectors to reduce the value.
-/// If \p ScalarizationFollows is set, getShuffleReduction() generates
-/// scalar result instead of vector result (Shuffles are followed by a
-/// scalar operation instead of a vector operation).
 Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
                            RecurrenceDescriptor::MinMaxRecurrenceKind
                                MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
-                           ArrayRef<Value *> RedOps = None,
-                           bool ScalarizationFollows = false);
+                           ArrayRef<Value *> RedOps = None);
 
 /// Create a target reduction of the given vector. The reduction operation
 /// is described by the \p Opcode parameter. min/max reductions require
Index: lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- lib/Analysis/TargetTransformInfo.cpp
+++ lib/Analysis/TargetTransformInfo.cpp
@@ -622,8 +622,8 @@
   return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
 }
 
-bool TargetTransformInfo::useScalarizedShuffleReduction() const {
-  return TTIImpl->useScalarizedShuffleReduction();
+bool TargetTransformInfo::useVariableLengthShuffleReduction() const {
+  return TTIImpl->useVariableLengthShuffleReduction();
 }
 
 bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
Index: lib/Transforms/Utils/LoopUtils.cpp
===================================================================
--- lib/Transforms/Utils/LoopUtils.cpp
+++ lib/Transforms/Utils/LoopUtils.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -1581,10 +1582,44 @@
 
 // Helper to generate a log2 shuffle reduction.
 Value *
+getVariableLengthShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
+                                  RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
+                                  ArrayRef<Value *> RedOps) {
+  unsigned VF = Src->getType()->getVectorNumElements();
+  // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+  // and vector ops, reducing the set of values being computed by half each
+  // round.
+  assert(isPowerOf2_32(VF) &&
+         "Reduction emission only supported for pow2 vectors!");
+  Value *TmpVec = Src;
+
+  for (unsigned i = VF; i != 2; i >>= 1) {
+    // Extract the lower half.
+    Value *Shuf1 = Builder.CreateShuffleVector(
+                   TmpVec, UndefValue::get(TmpVec->getType()),
+                   createSequentialMask(Builder, 0, i/2, 0), "rdx.shuf1");
+
+    // Extract the uppoer half.
+    Value *Shuf2 = Builder.CreateShuffleVector(
+        TmpVec, UndefValue::get(TmpVec->getType()),
+        createSequentialMask(Builder, i / 2, i / 2, 0), "rdx.shuf2");
+    TmpVec = createReductionOp(Builder, Shuf1, Shuf2, Op, MinMaxKind, RedOps);
+  }
+
+  // The result comes from performing the scalar operation on the first two
+  // elements of the vector.
+  return createReductionOp(
+    Builder,
+    Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)),
+    Builder.CreateExtractElement(TmpVec, Builder.getInt32(1)),
+    Op, MinMaxKind, RedOps);
+}
+
+// Helper to generate a log2 shuffle reduction.
+Value *
 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
                           RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
-                          ArrayRef<Value *> RedOps,
-                          bool ScalarizedShufRed) {
+                          ArrayRef<Value *> RedOps) {
   unsigned VF = Src->getType()->getVectorNumElements();
   // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
   // and vector ops, reducing the set of values being computed by half each
@@ -1593,9 +1628,7 @@
          "Reduction emission only supported for pow2 vectors!");
   Value *TmpVec = Src;
   SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
-  unsigned UB = ScalarizedShufRed ? 2 : 1;
-
-  for (unsigned i = VF; i != UB; i >>= 1) {
+  for (unsigned i = VF; i != 1; i >>= 1) {
     // Move the upper half of the vector to the lower half.
     for (unsigned j = 0; j != i / 2; ++j)
       ShuffleMask[j] = Builder.getInt32(i / 2 + j);
@@ -1610,17 +1643,8 @@
     TmpVec = createReductionOp(Builder, TmpVec, Shuf, Op, MinMaxKind, RedOps);
   }
 
-  if (!ScalarizedShufRed)
-    // The result is in the first element of the vector.
-    return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
-
-  // The result comes from performing the scalar operation on the first two
-  // elements of the vector.
-  return createReductionOp(
-    Builder,
-    Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)),
-    Builder.CreateExtractElement(TmpVec, Builder.getInt32(1)),
-    Op, MinMaxKind, RedOps);
+  // The result is in the first element of the vector.
+  return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
 }
 
 /// Create a simple vector reduction specified by an opcode and some
@@ -1698,8 +1722,10 @@
   if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
     return BuildFunc();
 
-  return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps,
-                             TTI->useScalarizedShuffleReduction());
+  if (TTI->useVariableLengthShuffleReduction())
+    return getVariableLengthShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
+
+  return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
 }
 
 /// Create a vector reduction using a given recurrence descriptor.