diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -114,7 +114,7 @@ /// `createAndFold` builder method. If `folder` is null, the regular `create` /// method is called. SmallVector applyMapToValues(OpBuilder &b, Location loc, - AffineMap map, ArrayRef values, + AffineMap map, ValueRange values, OperationFolder *folder = nullptr); /// Returns all the operands of `linalgOp` that are not views. diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -295,8 +295,7 @@ } auto concatMap = concatAffineMaps(indexingMaps); - auto aggregateMap = inversePermutation(concatMap); - if (!aggregateMap) + if (!concatMap.getNumSymbols() && !inversePermutation(concatMap)) return op.emitOpError("expected the concatenation of maps in indexing_map " "to be invertible"); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -76,6 +76,47 @@ return res; } +/// Creates a number of ranges equal to the number of dimensions in the `map`. +/// The function supports for now only limited number of expressions inside +/// map results. It expects a non-inverted, concatenated map and last values in +/// allViewSizes will be applied to the symbols in the map. +static SmallVector +emitLoopRangesWithSymbols(OpBuilder &b, Location loc, AffineMap map, + ValueRange allViewSizes) { + assert(allViewSizes.size() == map.getNumInputs() && + "Number of provided values must match number of inputs to the map."); + + SmallVector res(map.getNumDims()); + for (auto result : map.getResults()) { + if (auto d = result.dyn_cast()) { + if (res[d.getPosition()].offset) + continue; + res[d.getPosition()] = + SubViewOp::Range{std_constant_index(0), allViewSizes[d.getPosition()], + std_constant_index(1)}; + } + + if (auto binOp = result.dyn_cast()) { + auto lhs = binOp.getLHS().dyn_cast(); + auto rhs = binOp.getRHS().dyn_cast(); + if (!lhs || !rhs) + continue; + + auto m = lhs.getLHS().dyn_cast(); + if (!m) + continue; + + int mPos = m.getPosition(); + AffineMap fromMap = + AffineMap::get(map.getNumDims(), map.getNumSymbols(), rhs.getLHS()); + auto from = applyMapToValues(b, loc, fromMap, allViewSizes).front(); + auto to = b.create(loc, allViewSizes[mPos], from); + res[mPos] = SubViewOp::Range{from, to, std_constant_index(1)}; + } + } + return res; +} + template static void inlineRegionAndEmitStore(OpType op, ArrayRef indexedValues, ArrayRef> indexing, @@ -467,18 +508,38 @@ linalgOp.indexing_maps().template getAsRange(); auto maps = llvm::to_vector<8>( llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); })); - AffineMap invertedMap = inversePermutation(concatAffineMaps(maps)); - if (!invertedMap) - return {}; - if (invertedMap.isEmpty()) { - emitScalarImplementation({}, linalgOp); - return LinalgLoops(); - } + auto map = concatAffineMaps(maps); + SmallVector loopRanges; + + auto attr = linalgOp.template getAttrOfType("symbol_source"); + if (attr) { + // This map has symbols and thus is not a permutation. Therefore we + // cannot invert it. + unsigned symbolSource = attr.getInt(); + auto sizes = getViewSizes(builder, linalgOp); + unsigned numIn = map.getNumInputs(), numDims = map.getNumDims(); + unsigned diff = numIn - numDims; + + // Append or rewrite the end of the value list that corresponds to the + // symbols. They are in this case dims of the "symbol_source" operand. + sizes.resize(numIn); + for (unsigned idx = 0; idx < diff; idx++) + sizes[numDims + idx] = sizes[diff * symbolSource + idx]; + loopRanges = emitLoopRangesWithSymbols(scope.getBuilderRef(), + scope.getLocation(), map, sizes); + } else { + AffineMap invertedMap = inversePermutation(map); + if (!invertedMap) + return {}; + if (invertedMap.isEmpty()) { + emitScalarImplementation({}, linalgOp); + return LinalgLoops(); + } + loopRanges = emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), + invertedMap, getViewSizes(builder, linalgOp)); + } SmallVector allIvs; - auto loopRanges = - emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), invertedMap, - getViewSizes(builder, linalgOp)); GenerateLoopNest::doit( loopRanges, linalgOp.iterator_types().getValue(), [&](ValueRange ivs) { allIvs.append(ivs.begin(), ivs.end()); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -57,7 +57,7 @@ static Value emitOrFoldComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, - ArrayRef operandsRef, + ValueRange operandsRef, OperationFolder *folder) { SmallVector operands(operandsRef.begin(), operandsRef.end()); fullyComposeAffineMapAndOperands(&map, &operands); @@ -68,16 +68,16 @@ SmallVector mlir::linalg::applyMapToValues(OpBuilder &b, Location loc, AffineMap map, - ArrayRef values, + ValueRange values, OperationFolder *folder) { SmallVector res; res.reserve(map.getNumResults()); - unsigned numDims = map.getNumDims(); + unsigned numDims = map.getNumDims(), numSym = map.getNumSymbols(); // For each `expr` in `map`, applies the `expr` to the values extracted from // ranges. If the resulting application can be folded into a Value, the // folding occurs eagerly. Otherwise, an affine.apply operation is emitted. for (auto expr : map.getResults()) { - AffineMap map = AffineMap::get(numDims, 0, expr); + AffineMap map = AffineMap::get(numDims, numSym, expr); res.push_back(emitOrFoldComposedAffineApply(b, loc, map, values, folder)); } return res; diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -383,7 +383,8 @@ AffineMap mlir::inversePermutation(AffineMap map) { if (map.isEmpty()) return map; - // assert(map.getNumSymbols() == 0 && "expected map without symbols"); + assert(map.getNumSymbols() == 0 && "expected map without symbols"); + SmallVector exprs(map.getNumDims()); for (auto en : llvm::enumerate(map.getResults())) { auto expr = en.value(); @@ -399,7 +400,7 @@ for (auto expr : exprs) if (expr) seenExprs.push_back(expr); - if (seenExprs.size() != map.getNumDims()) + if (seenExprs.size() != map.getNumInputs()) return AffineMap(); return AffineMap::get(map.getNumResults(), 0, seenExprs, map.getContext()); } diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -14,6 +14,7 @@ // CHECKLOOP-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> // CHECKLOOP-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> // CHECKLOOP-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> +// CHECKLOOP-DAG: #[[$convHalf:.*]] = affine_map<()[s0] -> (s0 floordiv 2)> // CHECKLOOP-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)> // CHECKPARALLEL-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> @@ -26,6 +27,7 @@ // CHECKPARALLEL-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> // CHECKPARALLEL-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> // CHECKPARALLEL-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> +// CHECKPARALLEL-DAG: #[[$convHalf:.*]] = affine_map<()[s0] -> (s0 floordiv 2)> // CHECKPARALLEL-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)> @@ -946,10 +948,14 @@ // CHECKLOOP-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref // CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %c0 : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg2]], %c0 : memref -// CHECKLOOP: scf.for %[[b:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { +// CHECKLOOP: %c1 = constant 1 : index +// CHECKLOOP: %c0 = constant 0 : index +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %c0 : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %c0 : memref +// CHECKLOOP: %[[half:.*]] = affine.apply #[[$convHalf]]()[%[[dim1]]] +// CHECKLOOP: %[[sizeMinusHalf:.*]] = subi %[[dim0]], %[[half]] : index +// CHECKLOOP: scf.for %[[b:.*]] = %[[half]] to %[[sizeMinusHalf]] step %{{.*}} { +// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { // CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %c0 : memref // CHECKLOOP: %[[aff:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim2]]] // CHECKLOOP: %[[va:.*]] = load %[[arg1]][%[[aff]]] : memref @@ -963,9 +969,13 @@ // CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %c0 : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg2]], %c0 : memref -// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%c0, %c0) to (%[[dim1]], %[[dim0]]) step ({{.*}}) { +// CHECKPARALLEL: %c1 = constant 1 : index +// CHECKPARALLEL: %c0 = constant 0 : index +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %c0 : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %c0 : memref +// CHECKPARALLEL: %[[half:.*]] = affine.apply #[[$convHalf]]()[%[[dim1]]] +// CHECKPARALLEL: %[[sizeMinusHalf:.*]] = subi %[[dim0]], %[[half]] : index +// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%[[half]], %c0) to (%[[sizeMinusHalf]], %[[dim1]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %c0 : memref // CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim2]]] // CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[aff]]] : memref @@ -1008,14 +1018,18 @@ // CHECKLOOP-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref // CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %c0 : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %c1 : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg2]], %c0 : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %c1 : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %c0 : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %c1 : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %c0 : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg0]], %c1 : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim2]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { // CHECKLOOP: %[[dim4:.*]] = dim %[[arg0]], %c0 : memref // CHECKLOOP: %[[dim5:.*]] = dim %[[arg0]], %c1 : memref // CHECKLOOP: %[[aff1:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim4]]] @@ -1031,11 +1045,15 @@ // CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %c0 : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %c1 : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg2]], %c0 : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %c1 : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]]) = (%c0, %c0, %c0, %c0) to (%[[dim2]], %[[dim3]], %[[dim0]], %[[dim1]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %c0 : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %c1 : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %c0 : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg0]], %c1 : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim2]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]]) = (%[[half1]], %[[half2]], %c0, %c0) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[dim2]], %[[dim3]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg0]], %c0 : memref // CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg0]], %c1 : memref // CHECKPARALLEL: %[[aff1:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim4]]] @@ -1080,18 +1098,24 @@ // CHECKLOOP-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref // CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %c0 : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %c1 : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %c2 : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %c0 : memref -// CHECKLOOP: %[[dim4:.*]] = dim %[[arg2]], %c1 : memref -// CHECKLOOP: %[[dim5:.*]] = dim %[[arg2]], %c2 : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %c0 : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %c1 : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %c2 : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg0]], %c0 : memref +// CHECKLOOP: %[[dim4:.*]] = dim %[[arg0]], %c1 : memref +// CHECKLOOP: %[[dim5:.*]] = dim %[[arg0]], %c2 : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKLOOP: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKLOOP: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %[[half3]] to %[[sizeMinusHalf3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { // CHECKLOOP: %[[dim6:.*]] = dim %[[arg0]], %c0 : memref // CHECKLOOP: %[[dim7:.*]] = dim %[[arg0]], %c1 : memref // CHECKLOOP: %[[dim8:.*]] = dim %[[arg0]], %c2 : memref @@ -1109,13 +1133,19 @@ // CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %c0 : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %c1 : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %c2 : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %c0 : memref -// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %c1 : memref -// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %c2 : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]]) = (%c0, %c0, %c0, %c0, %c0, %c0) to (%[[dim3]], %[[dim4]], %[[dim5]], %[[dim0]], %[[dim1]], %[[dim2]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %c0 : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %c1 : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %c2 : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg0]], %c0 : memref +// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg0]], %c1 : memref +// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg0]], %c2 : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKPARALLEL: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKPARALLEL: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]]) = (%[[half1]], %[[half2]], %[[half3]], %c0, %c0, %c0) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[sizeMinusHalf3]], %[[dim3]], %[[dim4]], %[[dim5]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg0]], %c0 : memref // CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg0]], %c1 : memref // CHECKPARALLEL: %[[dim8:.*]] = dim %[[arg0]], %c2 : memref @@ -1162,22 +1192,30 @@ // CHECKLOOP-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref // CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %c0 : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %c1 : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %c2 : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg0]], %c3 : memref -// CHECKLOOP: %[[dim4:.*]] = dim %[[arg2]], %c0 : memref -// CHECKLOOP: %[[dim5:.*]] = dim %[[arg2]], %c1 : memref -// CHECKLOOP: %[[dim6:.*]] = dim %[[arg2]], %c2 : memref -// CHECKLOOP: %[[dim7:.*]] = dim %[[arg2]], %c3 : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim6]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim7]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i6:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i7:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %c0 : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %c1 : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %c2 : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg1]], %c3 : memref +// CHECKLOOP: %[[dim4:.*]] = dim %[[arg0]], %c0 : memref +// CHECKLOOP: %[[dim5:.*]] = dim %[[arg0]], %c1 : memref +// CHECKLOOP: %[[dim6:.*]] = dim %[[arg0]], %c2 : memref +// CHECKLOOP: %[[dim7:.*]] = dim %[[arg0]], %c3 : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKLOOP: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim6]]] +// CHECKLOOP: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKLOOP: %[[half4:.*]] = affine.apply #[[$convHalf]]()[%[[dim7]]] +// CHECKLOOP: %[[sizeMinusHalf4:.*]] = subi %[[dim3]], %[[half4]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %[[half3]] to %[[sizeMinusHalf3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %[[half4]] to %[[sizeMinusHalf4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i6:.*]] = %{{.*}} to %[[dim6]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i7:.*]] = %{{.*}} to %[[dim7]] step %{{.*}} { // CHECKLOOP: %[[dim8:.*]] = dim %[[arg0]], %c0 : memref // CHECKLOOP: %[[dim9:.*]] = dim %[[arg0]], %c1 : memref // CHECKLOOP: %[[dim10:.*]] = dim %[[arg0]], %c2 : memref @@ -1197,15 +1235,23 @@ // CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %c0 : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %c1 : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %c2 : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg0]], %c3 : memref -// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %c0 : memref -// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %c1 : memref -// CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg2]], %c2 : memref -// CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg2]], %c3 : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]], %[[i6:.*]], %[[i7:.*]]) = (%c0, %c0, %c0, %c0, %c0, %c0, %c0, %c0) to (%[[dim4]], %[[dim5]], %[[dim6]], %[[dim7]], %[[dim0]], %[[dim1]], %[[dim2]], %[[dim3]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %c0 : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %c1 : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %c2 : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg1]], %c3 : memref +// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg0]], %c0 : memref +// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg0]], %c1 : memref +// CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg0]], %c2 : memref +// CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg0]], %c3 : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKPARALLEL: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim6]]] +// CHECKPARALLEL: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKPARALLEL: %[[half4:.*]] = affine.apply #[[$convHalf]]()[%[[dim7]]] +// CHECKPARALLEL: %[[sizeMinusHalf4:.*]] = subi %[[dim3]], %[[half4]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]], %[[i6:.*]], %[[i7:.*]]) = (%[[half1]], %[[half2]], %[[half3]], %[[half4]], %c0, %c0, %c0, %c0) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[sizeMinusHalf3]], %[[sizeMinusHalf4]], %[[dim4]], %[[dim5]], %[[dim6]], %[[dim7]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim8:.*]] = dim %[[arg0]], %c0 : memref // CHECKPARALLEL: %[[dim9:.*]] = dim %[[arg0]], %c1 : memref // CHECKPARALLEL: %[[dim10:.*]] = dim %[[arg0]], %c2 : memref