diff --git a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h --- a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h @@ -71,6 +71,17 @@ SmallVector loops; }; +/// Adapters for building loop nests using the builder and the location stored +/// in ScopedContext. Actual builders are in scf::buildLoopNest. +scf::ValueVector loopNestBuilder(ValueRange lbs, ValueRange ubs, + ValueRange steps, + function_ref fun = nullptr); +scf::ValueVector loopNestBuilder(Value lb, Value ub, Value step, + function_ref fun = nullptr); +scf::ValueVector loopNestBuilder( + Value lb, Value ub, Value step, ValueRange iterArgInitValues, + function_ref fun = nullptr); + } // namespace edsc } // namespace mlir diff --git a/mlir/include/mlir/Dialect/SCF/SCF.h b/mlir/include/mlir/Dialect/SCF/SCF.h --- a/mlir/include/mlir/Dialect/SCF/SCF.h +++ b/mlir/include/mlir/Dialect/SCF/SCF.h @@ -40,9 +40,43 @@ ForOp getForInductionVarOwner(Value val); /// Returns the parallel loop parent of an induction variable. If the provided -// value is not an induction variable, then return nullptr. +/// value is not an induction variable, then return nullptr. ParallelOp getParallelForInductionVarOwner(Value val); +/// An owning vector of values, handy to return from functions. +using ValueVector = std::vector; + +/// Creates a perfect nest of "for" loops, i.e. all loops but the innermost +/// contain only another loop and a terminator. The lower, upper bounds and +/// steps are provided as `lbs`, `ubs` and `steps`, which are expected to be of +/// the same size. `iterArgs` points to the initial values of the loop iteration +/// arguments, which will be forwarded through the nest to the innermost loop. +/// The body of the loop is populated using `bodyBuilder`, which accepts an +/// ordered list of induction variables of all loops, followed by a list of +/// iteration arguments of the innermost loop, in the same order as provided to +/// `iterArgs`. This function is expected to return as many values as +/// `iterArgs`, of the same type and in the same order, that will be treated as +/// yielded from the loop body and forwarded back through the loop nest. If the +/// function is not provided, the loop nest is not expected to have iteration +/// arguments, the body of the innermost loop will be left empty, containing +/// only the zero-operand terminator. Returns the values yielded by the +/// outermost loop. If bound arrays are empty, the body builder will be called +/// once to construct the IR outside of the loop with an empty list of induction +/// variables. +ValueVector buildLoopNest( + OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, + ValueRange steps, ValueRange iterArgs, + function_ref + bodyBuilder = nullptr); + +/// A convenience version for building loop nests without iteration arguments +/// (like for reductions). Does not take the initial value of reductions or +/// expect the body building functions to return their current value. +ValueVector buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, + ValueRange ubs, ValueRange steps, + function_ref + bodyBuilder = nullptr); + } // end namespace scf } // end namespace mlir #endif // MLIR_DIALECT_SCF_H_ diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td --- a/mlir/include/mlir/Dialect/SCF/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td @@ -137,10 +137,15 @@ let builders = [ OpBuilder<"OpBuilder &builder, OperationState &result, " "Value lowerBound, Value upperBound, Value step, " - "ValueRange iterArgs = llvm::None"> + "ValueRange iterArgs = llvm::None, " + "function_ref" + " = nullptr"> ]; let extraClassDeclaration = [{ + using BodyBuilderFn = + function_ref; + Value getInductionVar() { return getBody()->getArgument(0); } Block::BlockArgListType getRegionIterArgs() { return getBody()->getArguments().drop_front(); diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -512,8 +512,8 @@ Value tmp = std_alloc(tmpMemRefType(transfer)); StdIndexedValue local(tmp); Value vec = vector_type_cast(tmp); - SmallVector ivs(lbs.size()); - LoopNestBuilder(ivs, lbs, ubs, steps)([&] { + loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { + auto ivs = llvm::to_vector<8>(loopIvs); // Swap the ivs which will reorder memory accesses. if (coalescedIdx >= 0) std::swap(ivs.back(), ivs[coalescedIdx]); @@ -586,8 +586,8 @@ StdIndexedValue local(tmp); Value vec = vector_type_cast(tmp); std_store(vectorValue, vec); - SmallVector ivs(lbs.size()); - LoopNestBuilder(ivs, lbs, ubs, steps)([&] { + loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { + auto ivs = llvm::to_vector<8>(loopIvs); // Swap the ivs which will reorder memory accesses. if (coalescedIdx >= 0) std::swap(ivs.back(), ivs[coalescedIdx]); diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp --- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp @@ -79,11 +79,13 @@ // Produce the loop nest with copies. SmallVector ivs(lbs.size()); - LoopNestBuilder(ivs, lbs, ubs, steps)([&]() { + loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { + ivs.assign(loopIvs.begin(), loopIvs.end()); auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank); StdIndexedValue fromHandle(from), toHandle(to); toHandle(activeIvs) = fromHandle(activeIvs); }); + ivs[0].getParentBlock()->dump(); // Map the innermost loops to threads in reverse order. for (auto en : diff --git a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp --- a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/SCF/EDSC/Builders.h" +#include "mlir/Dialect/SCF/SCF.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" @@ -110,3 +111,50 @@ result.enter(body); return result; } + +mlir::scf::ValueVector +mlir::edsc::loopNestBuilder(ValueRange lbs, ValueRange ubs, ValueRange steps, + function_ref fun) { + // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into + // the expected function interface. + assert(ScopedContext::getContext() && "EDSC ScopedContext not set up"); + return mlir::scf::buildLoopNest( + ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs, + steps, [&](OpBuilder &builder, Location loc, ValueRange ivs) { + ScopedContext context(builder, loc); + if (fun) + fun(ivs); + }); +} + +mlir::scf::ValueVector +mlir::edsc::loopNestBuilder(Value lb, Value ub, Value step, + function_ref fun) { + // Delegates to the ValueRange-based version by wrapping the lambda. + auto wrapper = [&](ValueRange ivs) { + assert(ivs.size() == 1); + if (fun) + fun(ivs[0]); + }; + return loopNestBuilder(ValueRange(lb), ValueRange(ub), ValueRange(step), + wrapper); +} + +mlir::scf::ValueVector mlir::edsc::loopNestBuilder( + Value lb, Value ub, Value step, ValueRange iterArgInitValues, + function_ref fun) { + // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into + // the expected function interface. + assert(ScopedContext::getContext() && "EDSC ScopedContext not set up"); + return mlir::scf::buildLoopNest( + ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lb, ub, + step, iterArgInitValues, + [&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) { + assert(ivs.size() == 1 && "expected one induction variable"); + ScopedContext context(builder, loc); + if (fun) + return fun(ivs[0], args); + return scf::ValueVector(iterArgInitValues.begin(), + iterArgInitValues.end()); + }); +} diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -40,18 +40,30 @@ //===----------------------------------------------------------------------===// void ForOp::build(OpBuilder &builder, OperationState &result, Value lb, - Value ub, Value step, ValueRange iterArgs) { + Value ub, Value step, ValueRange iterArgs, + BodyBuilderFn bodyBuilder) { result.addOperands({lb, ub, step}); result.addOperands(iterArgs); for (Value v : iterArgs) result.addTypes(v.getType()); Region *bodyRegion = result.addRegion(); - bodyRegion->push_back(new Block()); - if (iterArgs.empty()) - ForOp::ensureTerminator(*bodyRegion, builder, result.location); - bodyRegion->front().addArgument(builder.getIndexType()); + bodyRegion->push_back(new Block); + Block &bodyBlock = bodyRegion->front(); + bodyBlock.addArgument(builder.getIndexType()); for (Value v : iterArgs) - bodyRegion->front().addArgument(v.getType()); + bodyBlock.addArgument(v.getType()); + + // Create the default terminator if the builder is not provided and if the + // iteration arguments are not provided. Otherwise, leave this to the caller + // because we don't know which values to return from the loop. + if (iterArgs.empty() && !bodyBuilder) { + ForOp::ensureTerminator(*bodyRegion, builder, result.location); + } else if (bodyBuilder) { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(&bodyBlock); + bodyBuilder(builder, result.location, bodyBlock.getArgument(0), + bodyBlock.getArguments().drop_front()); + } } static LogicalResult verify(ForOp op) { @@ -229,6 +241,92 @@ regions.push_back(RegionSuccessor(getResults())); } +ValueVector mlir::scf::buildLoopNest( + OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, + ValueRange steps, ValueRange iterArgs, + function_ref + bodyBuilder) { + assert(lbs.size() == ubs.size() && + "expected the same number of lower and upper bounds"); + assert(lbs.size() == steps.size() && + "expected the same number of lower bounds and steps"); + + // If there are no bounds, call the body-building function and return early. + if (lbs.empty()) { + ValueVector results = + bodyBuilder ? bodyBuilder(builder, loc, ValueRange(), iterArgs) + : ValueVector(); + assert(results.size() == iterArgs.size() && + "loop nest body must return as many values as loop has iteration " + "arguments"); + return results; + } + + // First, create the loop structure iteratively using the body-builder + // callback of `ForOp::build`. Do not create `YieldOp`s yet. + OpBuilder::InsertionGuard guard(builder); + SmallVector loops; + SmallVector ivs; + loops.reserve(lbs.size()); + ivs.reserve(lbs.size()); + ValueRange currentIterArgs = iterArgs; + Location currentLoc = loc; + for (unsigned i = 0, e = lbs.size(); i < e; ++i) { + auto loop = builder.create( + currentLoc, lbs[i], ubs[i], steps[i], currentIterArgs, + [&](OpBuilder &nestedBuilder, Location nestedLoc, Value iv, + ValueRange args) { + ivs.push_back(iv); + // It is safe to store ValueRange args because it points to block + // arguments of a loop operation that we also own. + currentIterArgs = args; + currentLoc = nestedLoc; + }); + // Set the builder to point to the body of the newly created loop. We don't + // do this in the callback beacause the builder is reset when the callback + // returns. + builder.setInsertionPointToStart(loop.getBody()); + loops.push_back(loop); + } + + // For all loops but the innermost, yield the results of the nested loop. + for (unsigned i = 0, e = loops.size() - 1; i < e; ++i) { + builder.setInsertionPointToEnd(loops[i].getBody()); + builder.create(loc, loops[i + 1].getResults()); + } + + // In the body of the innermost loop, call the body building function if any + // and yield its results. + builder.setInsertionPointToStart(loops.back().getBody()); + ValueVector results = bodyBuilder + ? bodyBuilder(builder, currentLoc, ivs, + loops.back().getRegionIterArgs()) + : ValueVector(); + assert(results.size() == iterArgs.size() && + "loop nest body must return as many values as loop has iteration " + "arguments"); + builder.setInsertionPointToEnd(loops.back().getBody()); + builder.create(loc, results); + + // Return the results of the outermost loop. + return ValueVector(loops.front().result_begin(), loops.front().result_end()); +} + +ValueVector mlir::scf::buildLoopNest( + OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, + ValueRange steps, + function_ref bodyBuilder) { + // Delegate to the main function by wrapping the body builder. + return buildLoopNest(builder, loc, lbs, ubs, steps, llvm::None, + [&bodyBuilder](OpBuilder &nestedBuilder, + Location nestedLoc, ValueRange ivs, + ValueRange) -> ValueVector { + if (bodyBuilder) + bodyBuilder(nestedBuilder, nestedLoc, ivs); + return {}; + }); +} + //===----------------------------------------------------------------------===// // IfOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp --- a/mlir/test/EDSC/builder-api-test.cpp +++ b/mlir/test/EDSC/builder-api-test.cpp @@ -139,10 +139,10 @@ OpBuilder builder(f.getBody()); ScopedContext scope(builder, f.getLoc()); - Value i, a(f.getArgument(0)), b(f.getArgument(1)), c(f.getArgument(2)), + Value a(f.getArgument(0)), b(f.getArgument(1)), c(f.getArgument(2)), d(f.getArgument(3)); using namespace edsc::op; - LoopNestBuilder(&i, a - b, c + d, a)(); + loopNestBuilder(a - b, c + d, a); // clang-format off // CHECK-LABEL: func @builder_loop_for(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { @@ -1076,16 +1076,14 @@ ScopedContext scope(builder, f.getLoc()); Value init0 = std_constant_float(llvm::APFloat(1.0f), f32Type); Value init1 = std_constant_float(llvm::APFloat(2.0f), f32Type); - Value i, a(f.getArgument(0)), b(f.getArgument(1)), c(f.getArgument(2)), + Value a(f.getArgument(0)), b(f.getArgument(1)), c(f.getArgument(2)), d(f.getArgument(3)); - Value args01[2]; - Value &arg0 = args01[0], &arg1 = args01[1]; using namespace edsc::op; - auto results = - LoopNestBuilder(&i, a - b, c + d, a, args01, {init0, init1})([&] { - auto sum = arg0 + arg1; - loop_yield(ArrayRef{arg1, sum}); - }); + auto results = loopNestBuilder(a - b, c + d, a, {init0, init1}, + [&](Value iv, ValueRange args) { + Value sum = args[0] + args[1]; + return scf::ValueVector{args[1], sum}; + }); results[0] + results[1]; // clang-format off