diff --git a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
--- a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
@@ -71,6 +71,17 @@
   SmallVector<LoopBuilder, 4> loops;
 };
 
+/// Adapters for building loop nests using the builder and the location stored
+/// in ScopedContext. Actual builders are in scf::buildLoopNest.
+scf::ValueVector loopNestBuilder(ValueRange lbs, ValueRange ubs,
+                                 ValueRange steps,
+                                 function_ref<void(ValueRange)> fun = nullptr);
+scf::ValueVector loopNestBuilder(Value lb, Value ub, Value step,
+                                 function_ref<void(Value)> fun = nullptr);
+scf::ValueVector loopNestBuilder(
+    Value lb, Value ub, Value step, ValueRange iterArgInitValues,
+    function_ref<scf::ValueVector(Value, ValueRange)> fun = nullptr);
+
 } // namespace edsc
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/SCF/SCF.h b/mlir/include/mlir/Dialect/SCF/SCF.h
--- a/mlir/include/mlir/Dialect/SCF/SCF.h
+++ b/mlir/include/mlir/Dialect/SCF/SCF.h
@@ -40,9 +40,43 @@
 ForOp getForInductionVarOwner(Value val);
 
 /// Returns the parallel loop parent of an induction variable. If the provided
-// value is not an induction variable, then return nullptr.
+/// value is not an induction variable, then return nullptr.
 ParallelOp getParallelForInductionVarOwner(Value val);
 
+/// An owning vector of values, handy to return from functions.
+using ValueVector = std::vector<Value>;
+
+/// Creates a perfect nest of "for" loops, i.e. all loops but the innermost
+/// contain only another loop and a terminator. The lower, upper bounds and
+/// steps are provided as `lbs`, `ubs` and `steps`, which are expected to be of
+/// the same size. `iterArgs` points to the initial values of the loop iteration
+/// arguments, which will be forwarded through the nest to the innermost loop.
+/// The body of the loop is populated using `bodyBuilder`, which accepts an
+/// ordered list of induction variables of all loops, followed by a list of
+/// iteration arguments of the innermost loop, in the same order as provided to
+/// `iterArgs`. This function is expected to return as many values as
+/// `iterArgs`, of the same type and in the same order, that will be treated as
+/// yielded from the loop body and forwarded back through the loop nest. If the
+/// function is not provided, the loop nest is not expected to have iteration
+/// arguments, the body of the innermost loop will be left empty, containing
+/// only the zero-operand terminator. Returns the values yielded by the
+/// outermost loop. If bound arrays are empty, the body builder will be called
+/// once to construct the IR outside of the loop with an empty list of induction
+/// variables.
+ValueVector buildLoopNest(
+    OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs,
+    ValueRange steps, ValueRange iterArgs,
+    function_ref<ValueVector(OpBuilder &, Location, ValueRange, ValueRange)>
+        bodyBuilder = nullptr);
+
+/// A convenience version for building loop nests without iteration arguments
+/// (like for reductions). Does not take the initial value of reductions or
+/// expect the body building functions to return their current value.
+ValueVector buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs,
+                          ValueRange ubs, ValueRange steps,
+                          function_ref<void(OpBuilder &, Location, ValueRange)>
+                              bodyBuilder = nullptr);
+
 } // end namespace scf
 } // end namespace mlir
 #endif // MLIR_DIALECT_SCF_H_
diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td
--- a/mlir/include/mlir/Dialect/SCF/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td
@@ -137,10 +137,15 @@
   let builders = [
     OpBuilder<"OpBuilder &builder, OperationState &result, "
               "Value lowerBound, Value upperBound, Value step, "
-              "ValueRange iterArgs = llvm::None">
+              "ValueRange iterArgs = llvm::None, "
+              "function_ref<void(OpBuilder &, Location, Value, ValueRange)>"
+              "    = nullptr">
   ];
 
   let extraClassDeclaration = [{
+    using BodyBuilderFn =
+        function_ref<void(OpBuilder &, Location, Value, ValueRange)>;
+
     Value getInductionVar() { return getBody()->getArgument(0); }
     Block::BlockArgListType getRegionIterArgs() {
       return getBody()->getArguments().drop_front();
diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -512,8 +512,8 @@
   Value tmp = std_alloc(tmpMemRefType(transfer));
   StdIndexedValue local(tmp);
   Value vec = vector_type_cast(tmp);
-  SmallVector<Value, 8> ivs(lbs.size());
-  LoopNestBuilder(ivs, lbs, ubs, steps)([&] {
+  loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
+    auto ivs = llvm::to_vector<8>(loopIvs);
     // Swap the ivs which will reorder memory accesses.
     if (coalescedIdx >= 0)
       std::swap(ivs.back(), ivs[coalescedIdx]);
@@ -586,8 +586,8 @@
   StdIndexedValue local(tmp);
   Value vec = vector_type_cast(tmp);
   std_store(vectorValue, vec);
-  SmallVector<Value, 8> ivs(lbs.size());
-  LoopNestBuilder(ivs, lbs, ubs, steps)([&] {
+  loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
+    auto ivs = llvm::to_vector<8>(loopIvs);
     // Swap the ivs which will reorder memory accesses.
     if (coalescedIdx >= 0)
       std::swap(ivs.back(), ivs[coalescedIdx]);
diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
--- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
@@ -79,11 +79,13 @@
 
   // Produce the loop nest with copies.
   SmallVector<Value, 8> ivs(lbs.size());
-  LoopNestBuilder(ivs, lbs, ubs, steps)([&]() {
+  loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
+    ivs.assign(loopIvs.begin(), loopIvs.end());
     auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
     StdIndexedValue fromHandle(from), toHandle(to);
     toHandle(activeIvs) = fromHandle(activeIvs);
   });
+  ivs[0].getParentBlock()->dump();
 
   // Map the innermost loops to threads in reverse order.
   for (auto en :
diff --git a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
--- a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/SCF/EDSC/Builders.h"
+#include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 
@@ -110,3 +111,50 @@
   result.enter(body);
   return result;
 }
+
+mlir::scf::ValueVector
+mlir::edsc::loopNestBuilder(ValueRange lbs, ValueRange ubs, ValueRange steps,
+                            function_ref<void(ValueRange)> fun) {
+  // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
+  // the expected function interface.
+  assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
+  return mlir::scf::buildLoopNest(
+      ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs,
+      steps, [&](OpBuilder &builder, Location loc, ValueRange ivs) {
+        ScopedContext context(builder, loc);
+        if (fun)
+          fun(ivs);
+      });
+}
+
+mlir::scf::ValueVector
+mlir::edsc::loopNestBuilder(Value lb, Value ub, Value step,
+                            function_ref<void(Value)> fun) {
+  // Delegates to the ValueRange-based version by wrapping the lambda.
+  auto wrapper = [&](ValueRange ivs) {
+    assert(ivs.size() == 1);
+    if (fun)
+      fun(ivs[0]);
+  };
+  return loopNestBuilder(ValueRange(lb), ValueRange(ub), ValueRange(step),
+                         wrapper);
+}
+
+mlir::scf::ValueVector mlir::edsc::loopNestBuilder(
+    Value lb, Value ub, Value step, ValueRange iterArgInitValues,
+    function_ref<scf::ValueVector(Value, ValueRange)> fun) {
+  // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
+  // the expected function interface.
+  assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
+  return mlir::scf::buildLoopNest(
+      ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lb, ub,
+      step, iterArgInitValues,
+      [&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) {
+        assert(ivs.size() == 1 && "expected one induction variable");
+        ScopedContext context(builder, loc);
+        if (fun)
+          return fun(ivs[0], args);
+        return scf::ValueVector(iterArgInitValues.begin(),
+                                iterArgInitValues.end());
+      });
+}
diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@@ -40,18 +40,30 @@
 //===----------------------------------------------------------------------===//
 
 void ForOp::build(OpBuilder &builder, OperationState &result, Value lb,
-                  Value ub, Value step, ValueRange iterArgs) {
+                  Value ub, Value step, ValueRange iterArgs,
+                  BodyBuilderFn bodyBuilder) {
   result.addOperands({lb, ub, step});
   result.addOperands(iterArgs);
   for (Value v : iterArgs)
     result.addTypes(v.getType());
   Region *bodyRegion = result.addRegion();
-  bodyRegion->push_back(new Block());
-  if (iterArgs.empty())
-    ForOp::ensureTerminator(*bodyRegion, builder, result.location);
-  bodyRegion->front().addArgument(builder.getIndexType());
+  bodyRegion->push_back(new Block);
+  Block &bodyBlock = bodyRegion->front();
+  bodyBlock.addArgument(builder.getIndexType());
   for (Value v : iterArgs)
-    bodyRegion->front().addArgument(v.getType());
+    bodyBlock.addArgument(v.getType());
+
+  // Create the default terminator if the builder is not provided and if the
+  // iteration arguments are not provided. Otherwise, leave this to the caller
+  // because we don't know which values to return from the loop.
+  if (iterArgs.empty() && !bodyBuilder) {
+    ForOp::ensureTerminator(*bodyRegion, builder, result.location);
+  } else if (bodyBuilder) {
+    OpBuilder::InsertionGuard guard(builder);
+    builder.setInsertionPointToStart(&bodyBlock);
+    bodyBuilder(builder, result.location, bodyBlock.getArgument(0),
+                bodyBlock.getArguments().drop_front());
+  }
 }
 
 static LogicalResult verify(ForOp op) {
@@ -229,6 +241,92 @@
   regions.push_back(RegionSuccessor(getResults()));
 }
 
+ValueVector mlir::scf::buildLoopNest(
+    OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs,
+    ValueRange steps, ValueRange iterArgs,
+    function_ref<ValueVector(OpBuilder &, Location, ValueRange, ValueRange)>
+        bodyBuilder) {
+  assert(lbs.size() == ubs.size() &&
+         "expected the same number of lower and upper bounds");
+  assert(lbs.size() == steps.size() &&
+         "expected the same number of lower bounds and steps");
+
+  // If there are no bounds, call the body-building function and return early.
+  if (lbs.empty()) {
+    ValueVector results =
+        bodyBuilder ? bodyBuilder(builder, loc, ValueRange(), iterArgs)
+                    : ValueVector();
+    assert(results.size() == iterArgs.size() &&
+           "loop nest body must return as many values as loop has iteration "
+           "arguments");
+    return results;
+  }
+
+  // First, create the loop structure iteratively using the body-builder
+  // callback of `ForOp::build`. Do not create `YieldOp`s yet.
+  OpBuilder::InsertionGuard guard(builder);
+  SmallVector<scf::ForOp, 4> loops;
+  SmallVector<Value, 4> ivs;
+  loops.reserve(lbs.size());
+  ivs.reserve(lbs.size());
+  ValueRange currentIterArgs = iterArgs;
+  Location currentLoc = loc;
+  for (unsigned i = 0, e = lbs.size(); i < e; ++i) {
+    auto loop = builder.create<scf::ForOp>(
+        currentLoc, lbs[i], ubs[i], steps[i], currentIterArgs,
+        [&](OpBuilder &nestedBuilder, Location nestedLoc, Value iv,
+            ValueRange args) {
+          ivs.push_back(iv);
+          // It is safe to store ValueRange args because it points to block
+          // arguments of a loop operation that we also own.
+          currentIterArgs = args;
+          currentLoc = nestedLoc;
+        });
+    // Set the builder to point to the body of the newly created loop. We don't
+    // do this in the callback beacause the builder is reset when the callback
+    // returns.
+    builder.setInsertionPointToStart(loop.getBody());
+    loops.push_back(loop);
+  }
+
+  // For all loops but the innermost, yield the results of the nested loop.
+  for (unsigned i = 0, e = loops.size() - 1; i < e; ++i) {
+    builder.setInsertionPointToEnd(loops[i].getBody());
+    builder.create<scf::YieldOp>(loc, loops[i + 1].getResults());
+  }
+
+  // In the body of the innermost loop, call the body building function if any
+  // and yield its results.
+  builder.setInsertionPointToStart(loops.back().getBody());
+  ValueVector results = bodyBuilder
+                            ? bodyBuilder(builder, currentLoc, ivs,
+                                          loops.back().getRegionIterArgs())
+                            : ValueVector();
+  assert(results.size() == iterArgs.size() &&
+         "loop nest body must return as many values as loop has iteration "
+         "arguments");
+  builder.setInsertionPointToEnd(loops.back().getBody());
+  builder.create<scf::YieldOp>(loc, results);
+
+  // Return the results of the outermost loop.
+  return ValueVector(loops.front().result_begin(), loops.front().result_end());
+}
+
+ValueVector mlir::scf::buildLoopNest(
+    OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs,
+    ValueRange steps,
+    function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilder) {
+  // Delegate to the main function by wrapping the body builder.
+  return buildLoopNest(builder, loc, lbs, ubs, steps, llvm::None,
+                       [&bodyBuilder](OpBuilder &nestedBuilder,
+                                      Location nestedLoc, ValueRange ivs,
+                                      ValueRange) -> ValueVector {
+                         if (bodyBuilder)
+                           bodyBuilder(nestedBuilder, nestedLoc, ivs);
+                         return {};
+                       });
+}
+
 //===----------------------------------------------------------------------===//
 // IfOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp
--- a/mlir/test/EDSC/builder-api-test.cpp
+++ b/mlir/test/EDSC/builder-api-test.cpp
@@ -139,10 +139,10 @@
 
   OpBuilder builder(f.getBody());
   ScopedContext scope(builder, f.getLoc());
-  Value i, a(f.getArgument(0)), b(f.getArgument(1)), c(f.getArgument(2)),
+  Value a(f.getArgument(0)), b(f.getArgument(1)), c(f.getArgument(2)),
       d(f.getArgument(3));
   using namespace edsc::op;
-  LoopNestBuilder(&i, a - b, c + d, a)();
+  loopNestBuilder(a - b, c + d, a);
 
   // clang-format off
   // CHECK-LABEL: func @builder_loop_for(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
@@ -1076,16 +1076,14 @@
   ScopedContext scope(builder, f.getLoc());
   Value init0 = std_constant_float(llvm::APFloat(1.0f), f32Type);
   Value init1 = std_constant_float(llvm::APFloat(2.0f), f32Type);
-  Value i, a(f.getArgument(0)), b(f.getArgument(1)), c(f.getArgument(2)),
+  Value a(f.getArgument(0)), b(f.getArgument(1)), c(f.getArgument(2)),
       d(f.getArgument(3));
-  Value args01[2];
-  Value &arg0 = args01[0], &arg1 = args01[1];
   using namespace edsc::op;
-  auto results =
-      LoopNestBuilder(&i, a - b, c + d, a, args01, {init0, init1})([&] {
-        auto sum = arg0 + arg1;
-        loop_yield(ArrayRef<Value>{arg1, sum});
-      });
+  auto results = loopNestBuilder(a - b, c + d, a, {init0, init1},
+                                 [&](Value iv, ValueRange args) {
+                                   Value sum = args[0] + args[1];
+                                   return scf::ValueVector{args[1], sum};
+                                 });
   results[0] + results[1];
 
   // clang-format off