diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -116,9 +116,13 @@ for (unsigned idx = 0; idx < attr.getInt(); idx++) symbolsPos += ranks[idx]; - // Append or rewrite the end of the value list that corresponds to the + // Append the end of the value list that corresponds to the // values mapping to symbols. Since inside concatinated map symbols are // repeated we have to repeat the sizes as well. + + // Reserve is mandatory to avoid a potential undefined behavior with + // pushing back to smallvector from itself. + res.reserve(res.size() + ranks.size() * numSymb); for (unsigned idx = 0, s = ranks.size(); idx < s; ++idx) for (unsigned idx2 = 0; idx2 < numSymb; ++idx2) res.push_back(res[symbolsPos + idx2]); @@ -131,7 +135,7 @@ /// `createAndFold` builder method. If `folder` is null, the regular `create` /// method is called. SmallVector applyMapToValues(OpBuilder &b, Location loc, - AffineMap map, ArrayRef values, + AffineMap map, ValueRange values, OperationFolder *folder = nullptr); /// Returns all the operands of `linalgOp` that are not views. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -76,6 +76,76 @@ return res; } +/// Creates a number of ranges equal to the number of dimensions in the `map`. +/// The function supports for now only limited number of expressions inside +/// map results. It expects a non-inverted, concatenated map and last values in +/// viewSizes will be applied to the symbols in the map. +static SmallVector +emitLoopRangesWithSymbols(OpBuilder &b, Location loc, AffineMap map, + ValueRange viewSizes) { + unsigned numDims = map.getNumDims(), numRes = map.getNumResults(); + unsigned numSym = map.getNumSymbols(); + assert(viewSizes.size() == numRes + numSym && + "viewSizes must contain sizes of all views and values for symbols"); + SmallVector res(numDims); + for (unsigned idx = 0; idx < numRes; ++idx) { + auto result = map.getResult(idx); + if (auto d = result.dyn_cast()) { + if (res[d.getPosition()].offset) + continue; + res[d.getPosition()] = SubViewOp::Range{ + std_constant_index(0), viewSizes[idx], std_constant_index(1)}; + } + + // If the access pattern is of form (m, n)[s] -> (m + n - s floordiv 2), + // then the bounds are: + // (s floordiv 2) <= m <= (size(m) + s floordiv 2 - s + 1). + // where size(n) is applied to the symbol s. + // This is done statically now. + if (auto binOp = result.dyn_cast()) { + auto lhs = binOp.getLHS().dyn_cast(); + auto rhs = binOp.getRHS().dyn_cast(); + if (!lhs || !rhs || binOp.getKind() != AffineExprKind::Add || + lhs.getKind() != AffineExprKind::Add || + rhs.getKind() != mlir::AffineExprKind::Mul) + continue; + + auto m = lhs.getLHS().dyn_cast(); + auto n = lhs.getRHS().dyn_cast(); + auto fDiv = rhs.getLHS().dyn_cast(); + auto minusOne = rhs.getRHS().dyn_cast(); + if (!m || !n || !fDiv || !minusOne || + fDiv.getKind() != AffineExprKind::FloorDiv || + fDiv.getLHS().getKind() != AffineExprKind::SymbolId || + fDiv.getRHS().getKind() != AffineExprKind::Constant) + continue; + + int64_t c = fDiv.getRHS().dyn_cast().getValue(); + auto s = fDiv.getLHS().dyn_cast(); + int64_t c2 = minusOne.getValue(); + if (c != 2 || c2 != -1) + continue; + + int mPos = m.getPosition(); + AffineExpr one = getAffineConstantExpr(1, s.getContext()); + // Construction of upper offset affine part (s floordiv 2 - s + 1). + AffineExpr upperOffsetExpr = fDiv + one - s; + AffineMap fromMap = AffineMap::get(numDims, numSym, fDiv); + AffineMap offsetMap = AffineMap::get(numDims, numSym, upperOffsetExpr); + SmallVector values(viewSizes.begin(), + viewSizes.begin() + numDims); + values.insert(values.end(), viewSizes.begin() + numRes, viewSizes.end()); + // Construction of the lower offset (s floordiv 2). + Value from = applyMapToValues(b, loc, fromMap, values).front(); + // Construction of the upper offset (size(m) + s floordiv 2 - s + 1). + Value upperOffset = applyMapToValues(b, loc, offsetMap, values).front(); + auto to = b.create(loc, viewSizes[mPos], upperOffset); + res[mPos] = SubViewOp::Range{from, to, std_constant_index(1)}; + } + } + return res; +} + template static void inlineRegionAndEmitStore(OpType op, ArrayRef indexedValues, ArrayRef> indexing, @@ -469,32 +539,24 @@ llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); })); SmallVector sizes = getViewSizes(builder, linalgOp); AffineMap map = concatAffineMaps(maps); + SmallVector loopRanges; + if (map.getNumSymbols()) { - // Ignore symbols for now as they are not supported by inversePermutation. - unsigned dims = map.getNumDims(); - SmallVector zeros( - map.getNumSymbols(), getAffineConstantExpr(0, map.getContext())); - SmallVector res; - for (auto result : map.getResults()) - res.push_back(result.replaceDimsAndSymbols({}, zeros)); - - map = AffineMap::get(dims, 0, res, map.getContext()); - - // Cut off values that would have been applied to symbols - sizes.resize(res.size()); - } + loopRanges = emitLoopRangesWithSymbols(scope.getBuilderRef(), + scope.getLocation(), map, sizes); + } else { + AffineMap invertedMap = inversePermutation(map); + if (!invertedMap) + return {}; + if (invertedMap.isEmpty()) { + emitScalarImplementation({}, linalgOp); + return LinalgLoops(); + } - AffineMap invertedMap = inversePermutation(map); - if (!invertedMap) - return {}; - if (invertedMap.isEmpty()) { - emitScalarImplementation({}, linalgOp); - return LinalgLoops(); + loopRanges = emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), + invertedMap, sizes); } - SmallVector allIvs; - auto loopRanges = emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), - invertedMap, sizes); GenerateLoopNest::doit( loopRanges, linalgOp.iterator_types().getValue(), [&](ValueRange ivs) { allIvs.append(ivs.begin(), ivs.end()); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -57,7 +57,7 @@ static Value emitOrFoldComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, - ArrayRef operandsRef, + ValueRange operandsRef, OperationFolder *folder) { SmallVector operands(operandsRef.begin(), operandsRef.end()); fullyComposeAffineMapAndOperands(&map, &operands); @@ -68,16 +68,16 @@ SmallVector mlir::linalg::applyMapToValues(OpBuilder &b, Location loc, AffineMap map, - ArrayRef values, + ValueRange values, OperationFolder *folder) { SmallVector res; res.reserve(map.getNumResults()); - unsigned numDims = map.getNumDims(); + unsigned numDims = map.getNumDims(), numSym = map.getNumSymbols(); // For each `expr` in `map`, applies the `expr` to the values extracted from // ranges. If the resulting application can be folded into a Value, the // folding occurs eagerly. Otherwise, an affine.apply operation is emitted. for (auto expr : map.getResults()) { - AffineMap map = AffineMap::get(numDims, 0, expr); + AffineMap map = AffineMap::get(numDims, numSym, expr); res.push_back(emitOrFoldComposedAffineApply(b, loc, map, values, folder)); } return res; diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -14,6 +14,8 @@ // CHECKLOOP-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> // CHECKLOOP-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> // CHECKLOOP-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> +// CHECKLOOP-DAG: #[[$convHalf:.*]] = affine_map<()[s0] -> (s0 floordiv 2)> +// CHECKLOOP-DAG: #[[$convUpperOffset:.*]] = affine_map<()[s0] -> (s0 floordiv 2 - s0 + 1)> // CHECKLOOP-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)> // CHECKPARALLEL-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> @@ -26,6 +28,8 @@ // CHECKPARALLEL-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> // CHECKPARALLEL-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> // CHECKPARALLEL-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> +// CHECKPARALLEL-DAG: #[[$convHalf:.*]] = affine_map<()[s0] -> (s0 floordiv 2)> +// CHECKPARALLEL-DAG: #[[$convUpperOffset:.*]] = affine_map<()[s0] -> (s0 floordiv 2 - s0 + 1)> // CHECKPARALLEL-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)> @@ -947,10 +951,13 @@ // CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKLOOP: %[[c0:.*]] = constant 0 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: scf.for %[[b:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[half:.*]] = affine.apply #[[$convHalf]]()[%[[dim1]]] +// CHECKLOOP: %[[upperHalf:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim1]]] +// CHECKLOOP: %[[sizeMinusHalf:.*]] = addi %[[dim0]], %[[upperHalf]] : index +// CHECKLOOP: scf.for %[[b:.*]] = %[[half]] to %[[sizeMinusHalf]] step %{{.*}} { +// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { // CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c0]] : memref // CHECKLOOP: %[[aff:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim2]]] // CHECKLOOP: %[[va:.*]] = load %[[arg0]][%[[aff]]] : memref @@ -965,9 +972,12 @@ // CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL: %[[c0:.*]] = constant 0 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%{{.*}}, %{{.*}}) to (%[[dim1]], %[[dim0]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[half:.*]] = affine.apply #[[$convHalf]]()[%[[dim1]]] +// CHECKPARALLEL: %[[upperHalf:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim1]]] +// CHECKPARALLEL: %[[sizeMinusHalf:.*]] = addi %[[dim0]], %[[upperHalf]] : index +// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%[[half]], %{{.*}}) to (%[[sizeMinusHalf]], %[[dim1]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c0]] : memref // CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim2]]] // CHECKPARALLEL: %[[va:.*]] = load %[[arg0]][%[[aff]]] : memref @@ -1012,14 +1022,20 @@ // CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKLOOP: %[[c0:.*]] = constant 0 : index // CHECKLOOP: %[[c1:.*]] = constant 1 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim2]]] +// CHECKLOOP: %[[upperHalf1:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim2]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = addi %[[dim0]], %[[upperHalf1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKLOOP: %[[upperHalf2:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim3]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = addi %[[dim1]], %[[upperHalf2]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { // CHECKLOOP: %[[dim4:.*]] = dim %[[arg1]], %[[c0]] : memref // CHECKLOOP: %[[dim5:.*]] = dim %[[arg1]], %[[c1]] : memref // CHECKLOOP: %[[aff1:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim4]]] @@ -1037,11 +1053,17 @@ // CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL: %[[c0:.*]] = constant 0 : index // CHECKPARALLEL: %[[c1:.*]] = constant 1 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim2]], %[[dim3]], %[[dim0]], %[[dim1]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim2]]] +// CHECKPARALLEL: %[[upperHalf1:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim2]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = addi %[[dim0]], %[[upperHalf1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKPARALLEL: %[[upperHalf2:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim3]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = addi %[[dim1]], %[[upperHalf2]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]]) = (%[[half1]], %[[half2]], %{{.*}}, %{{.*}}) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[dim2]], %[[dim3]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg1]], %[[c0]] : memref // CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg1]], %[[c1]] : memref // CHECKPARALLEL: %[[aff1:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim4]]] @@ -1089,18 +1111,27 @@ // CHECKLOOP: %[[c0:.*]] = constant 0 : index // CHECKLOOP: %[[c1:.*]] = constant 1 : index // CHECKLOOP: %[[c2:.*]] = constant 2 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKLOOP: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[dim4:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKLOOP: %[[dim5:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKLOOP: %[[upperHalf1:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim3]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = addi %[[dim0]], %[[upperHalf1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKLOOP: %[[upperHalf2:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim4]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = addi %[[dim1]], %[[upperHalf2]] : index +// CHECKLOOP: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKLOOP: %[[upperHalf3:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim5]]] +// CHECKLOOP: %[[sizeMinusHalf3:.*]] = addi %[[dim2]], %[[upperHalf3]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %[[half3]] to %[[sizeMinusHalf3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { // CHECKLOOP: %[[dim6:.*]] = dim %[[arg1]], %[[c0]] : memref // CHECKLOOP: %[[dim7:.*]] = dim %[[arg1]], %[[c1]] : memref // CHECKLOOP: %[[dim8:.*]] = dim %[[arg1]], %[[c2]] : memref @@ -1121,13 +1152,22 @@ // CHECKPARALLEL: %[[c0:.*]] = constant 0 : index // CHECKPARALLEL: %[[c1:.*]] = constant 1 : index // CHECKPARALLEL: %[[c2:.*]] = constant 2 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim3]], %[[dim4]], %[[dim5]], %[[dim0]], %[[dim1]], %[[dim2]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKPARALLEL: %[[upperHalf1:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim3]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = addi %[[dim0]], %[[upperHalf1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKPARALLEL: %[[upperHalf2:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim4]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = addi %[[dim1]], %[[upperHalf2]] : index +// CHECKPARALLEL: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKPARALLEL: %[[upperHalf3:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim5]]] +// CHECKPARALLEL: %[[sizeMinusHalf3:.*]] = addi %[[dim2]], %[[upperHalf3]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]]) = (%[[half1]], %[[half2]], %[[half3]], %{{.*}}, %{{.*}}, %{{.*}}) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[sizeMinusHalf3]], %[[dim3]], %[[dim4]], %[[dim5]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg1]], %[[c0]] : memref // CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg1]], %[[c1]] : memref // CHECKPARALLEL: %[[dim8:.*]] = dim %[[arg1]], %[[c2]] : memref @@ -1178,22 +1218,34 @@ // CHECKLOOP: %[[c1:.*]] = constant 1 : index // CHECKLOOP: %[[c2:.*]] = constant 2 : index // CHECKLOOP: %[[c3:.*]] = constant 3 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg1]], %[[c3]] : memref -// CHECKLOOP: %[[dim4:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: %[[dim5:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKLOOP: %[[dim6:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKLOOP: %[[dim7:.*]] = dim %[[arg2]], %[[c3]] : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim6]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim7]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i6:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i7:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg0]], %[[c3]] : memref +// CHECKLOOP: %[[dim4:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[dim5:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKLOOP: %[[dim6:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKLOOP: %[[dim7:.*]] = dim %[[arg1]], %[[c3]] : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKLOOP: %[[upperHalf1:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim4]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = addi %[[dim0]], %[[upperHalf1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKLOOP: %[[upperHalf2:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim5]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = addi %[[dim1]], %[[upperHalf2]] : index +// CHECKLOOP: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim6]]] +// CHECKLOOP: %[[upperHalf3:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim6]]] +// CHECKLOOP: %[[sizeMinusHalf3:.*]] = addi %[[dim2]], %[[upperHalf3]] : index +// CHECKLOOP: %[[half4:.*]] = affine.apply #[[$convHalf]]()[%[[dim7]]] +// CHECKLOOP: %[[upperHalf4:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim7]]] +// CHECKLOOP: %[[sizeMinusHalf4:.*]] = addi %[[dim3]], %[[upperHalf4]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %[[half3]] to %[[sizeMinusHalf3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %[[half4]] to %[[sizeMinusHalf4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i6:.*]] = %{{.*}} to %[[dim6]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i7:.*]] = %{{.*}} to %[[dim7]] step %{{.*}} { // CHECKLOOP: %[[dim8:.*]] = dim %[[arg1]], %[[c0]] : memref // CHECKLOOP: %[[dim9:.*]] = dim %[[arg1]], %[[c1]] : memref // CHECKLOOP: %[[dim10:.*]] = dim %[[arg1]], %[[c2]] : memref @@ -1217,15 +1269,27 @@ // CHECKPARALLEL: %[[c1:.*]] = constant 1 : index // CHECKPARALLEL: %[[c2:.*]] = constant 2 : index // CHECKPARALLEL: %[[c3:.*]] = constant 3 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg1]], %[[c3]] : memref -// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg2]], %[[c3]] : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]], %[[i6:.*]], %[[i7:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim4]], %[[dim5]], %[[dim6]], %[[dim7]], %[[dim0]], %[[dim1]], %[[dim2]], %[[dim3]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg0]], %[[c3]] : memref +// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg1]], %[[c3]] : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKPARALLEL: %[[upperHalf1:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim4]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = addi %[[dim0]], %[[upperHalf1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKPARALLEL: %[[upperHalf2:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim5]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = addi %[[dim1]], %[[upperHalf2]] : index +// CHECKPARALLEL: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim6]]] +// CHECKPARALLEL: %[[upperHalf3:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim6]]] +// CHECKPARALLEL: %[[sizeMinusHalf3:.*]] = addi %[[dim2]], %[[upperHalf3]] : index +// CHECKPARALLEL: %[[half4:.*]] = affine.apply #[[$convHalf]]()[%[[dim7]]] +// CHECKPARALLEL: %[[upperHalf4:.*]] = affine.apply #[[$convUpperOffset]]()[%[[dim7]]] +// CHECKPARALLEL: %[[sizeMinusHalf4:.*]] = addi %[[dim3]], %[[upperHalf4]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]], %[[i6:.*]], %[[i7:.*]]) = (%[[half1]], %[[half2]], %[[half3]], %[[half4]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[sizeMinusHalf3]], %[[sizeMinusHalf4]], %[[dim4]], %[[dim5]], %[[dim6]], %[[dim7]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim8:.*]] = dim %[[arg1]], %[[c0]] : memref // CHECKPARALLEL: %[[dim9:.*]] = dim %[[arg1]], %[[c1]] : memref // CHECKPARALLEL: %[[dim10:.*]] = dim %[[arg1]], %[[c2]] : memref