diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -114,7 +114,7 @@ /// `createAndFold` builder method. If `folder` is null, the regular `create` /// method is called. SmallVector applyMapToValues(OpBuilder &b, Location loc, - AffineMap map, ArrayRef values, + AffineMap map, ValueRange values, OperationFolder *folder = nullptr); /// Returns all the operands of `linalgOp` that are not views. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -76,6 +76,47 @@ return res; } +/// Creates a number of ranges equal to the number of dimensions in the `map`. +/// The function supports for now only limited number of expressions inside +/// map results. It expects a non-inverted, concatenated map and last values in +/// allViewSizes will be applied to the symbols in the map. +static SmallVector +emitLoopRangesWithSymbols(OpBuilder &b, Location loc, AffineMap map, + ValueRange allViewSizes) { + assert(allViewSizes.size() == map.getNumInputs() && + "Number of provided values must match number of inputs to the map."); + + SmallVector res(map.getNumDims()); + for (auto result : map.getResults()) { + if (auto d = result.dyn_cast()) { + if (res[d.getPosition()].offset) + continue; + res[d.getPosition()] = + SubViewOp::Range{std_constant_index(0), allViewSizes[d.getPosition()], + std_constant_index(1)}; + } + + if (auto binOp = result.dyn_cast()) { + auto lhs = binOp.getLHS().dyn_cast(); + auto rhs = binOp.getRHS().dyn_cast(); + if (!lhs || !rhs) + continue; + + auto m = lhs.getLHS().dyn_cast(); + if (!m) + continue; + + int mPos = m.getPosition(); + AffineMap fromMap = + AffineMap::get(map.getNumDims(), map.getNumSymbols(), rhs.getLHS()); + auto from = applyMapToValues(b, loc, fromMap, allViewSizes).front(); + auto to = b.create(loc, allViewSizes[mPos], from); + res[mPos] = SubViewOp::Range{from, to, std_constant_index(1)}; + } + } + return res; +} + template static void inlineRegionAndEmitStore(OpType op, ArrayRef indexedValues, ArrayRef> indexing, @@ -467,21 +508,38 @@ linalgOp.indexing_maps().template getAsRange(); auto maps = llvm::to_vector<8>( llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); })); - AffineMap map = concatAffineMaps(maps); - // ignore symbols for now as they are not supported - AffineMap invertedMap = inversePermutation( - AffineMap::get(map.getNumDims(), 0, map.getResults(), map.getContext())); - if (!invertedMap) - return {}; - if (invertedMap.isEmpty()) { - emitScalarImplementation({}, linalgOp); - return LinalgLoops(); - } + auto map = concatAffineMaps(maps); + SmallVector loopRanges; + + auto attr = linalgOp.template getAttrOfType("symbol_source"); + if (attr) { + // This map has symbols and thus is not a permutation. Therefore we + // cannot invert it. + unsigned symbolSource = attr.getInt(); + auto sizes = getViewSizes(builder, linalgOp); + unsigned numIn = map.getNumInputs(), numDims = map.getNumDims(); + unsigned diff = numIn - numDims; + + // Append or rewrite the end of the value list that corresponds to the + // symbols. They are in this case dims of the "symbol_source" operand. + sizes.resize(numIn); + for (unsigned idx = 0; idx < diff; idx++) + sizes[numDims + idx] = sizes[diff * symbolSource + idx]; + loopRanges = emitLoopRangesWithSymbols(scope.getBuilderRef(), + scope.getLocation(), map, sizes); + } else { + AffineMap invertedMap = inversePermutation(map); + if (!invertedMap) + return {}; + if (invertedMap.isEmpty()) { + emitScalarImplementation({}, linalgOp); + return LinalgLoops(); + } + loopRanges = emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), + invertedMap, getViewSizes(builder, linalgOp)); + } SmallVector allIvs; - auto loopRanges = - emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), invertedMap, - getViewSizes(builder, linalgOp)); GenerateLoopNest::doit( loopRanges, linalgOp.iterator_types().getValue(), [&](ValueRange ivs) { allIvs.append(ivs.begin(), ivs.end()); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -57,7 +57,7 @@ static Value emitOrFoldComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, - ArrayRef operandsRef, + ValueRange operandsRef, OperationFolder *folder) { SmallVector operands(operandsRef.begin(), operandsRef.end()); fullyComposeAffineMapAndOperands(&map, &operands); @@ -68,16 +68,16 @@ SmallVector mlir::linalg::applyMapToValues(OpBuilder &b, Location loc, AffineMap map, - ArrayRef values, + ValueRange values, OperationFolder *folder) { SmallVector res; res.reserve(map.getNumResults()); - unsigned numDims = map.getNumDims(); + unsigned numDims = map.getNumDims(), numSym = map.getNumSymbols(); // For each `expr` in `map`, applies the `expr` to the values extracted from // ranges. If the resulting application can be folded into a Value, the // folding occurs eagerly. Otherwise, an affine.apply operation is emitted. for (auto expr : map.getResults()) { - AffineMap map = AffineMap::get(numDims, 0, expr); + AffineMap map = AffineMap::get(numDims, numSym, expr); res.push_back(emitOrFoldComposedAffineApply(b, loc, map, values, folder)); } return res; diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -14,6 +14,7 @@ // CHECKLOOP-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> // CHECKLOOP-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> // CHECKLOOP-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> +// CHECKLOOP-DAG: #[[$convHalf:.*]] = affine_map<()[s0] -> (s0 floordiv 2)> // CHECKLOOP-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)> // CHECKPARALLEL-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> @@ -26,6 +27,7 @@ // CHECKPARALLEL-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> // CHECKPARALLEL-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> // CHECKPARALLEL-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> +// CHECKPARALLEL-DAG: #[[$convHalf:.*]] = affine_map<()[s0] -> (s0 floordiv 2)> // CHECKPARALLEL-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)> @@ -947,10 +949,12 @@ // CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKLOOP: %[[c0:.*]] = constant 0 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: scf.for %[[b:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[half:.*]] = affine.apply #[[$convHalf]]()[%[[dim1]]] +// CHECKLOOP: %[[sizeMinusHalf:.*]] = subi %[[dim0]], %[[half]] : index +// CHECKLOOP: scf.for %[[b:.*]] = %[[half]] to %[[sizeMinusHalf]] step %{{.*}} { +// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { // CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKLOOP: %[[aff:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim2]]] // CHECKLOOP: %[[va:.*]] = load %[[arg1]][%[[aff]]] : memref @@ -965,9 +969,11 @@ // CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL: %[[c0:.*]] = constant 0 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%{{.*}}, %{{.*}}) to (%[[dim1]], %[[dim0]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[half:.*]] = affine.apply #[[$convHalf]]()[%[[dim1]]] +// CHECKPARALLEL: %[[sizeMinusHalf:.*]] = subi %[[dim0]], %[[half]] : index +// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%[[half]], %{{.*}}) to (%[[sizeMinusHalf]], %[[dim1]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim2]]] // CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[aff]]] : memref @@ -1012,14 +1018,18 @@ // CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKLOOP: %[[c0:.*]] = constant 0 : index // CHECKLOOP: %[[c1:.*]] = constant 1 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim2]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { // CHECKLOOP: %[[dim4:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKLOOP: %[[dim5:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKLOOP: %[[aff1:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim4]]] @@ -1037,11 +1047,15 @@ // CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL: %[[c0:.*]] = constant 0 : index // CHECKPARALLEL: %[[c1:.*]] = constant 1 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim2]], %[[dim3]], %[[dim0]], %[[dim1]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim2]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]]) = (%[[half1]], %[[half2]], %{{.*}}, %{{.*}}) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[dim2]], %[[dim3]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKPARALLEL: %[[aff1:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim4]]] @@ -1089,18 +1103,24 @@ // CHECKLOOP: %[[c0:.*]] = constant 0 : index // CHECKLOOP: %[[c1:.*]] = constant 1 : index // CHECKLOOP: %[[c2:.*]] = constant 2 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKLOOP: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[dim4:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKLOOP: %[[dim5:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKLOOP: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKLOOP: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %[[half3]] to %[[sizeMinusHalf3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { // CHECKLOOP: %[[dim6:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKLOOP: %[[dim7:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKLOOP: %[[dim8:.*]] = dim %[[arg0]], %[[c2]] : memref @@ -1121,13 +1141,19 @@ // CHECKPARALLEL: %[[c0:.*]] = constant 0 : index // CHECKPARALLEL: %[[c1:.*]] = constant 1 : index // CHECKPARALLEL: %[[c2:.*]] = constant 2 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim3]], %[[dim4]], %[[dim5]], %[[dim0]], %[[dim1]], %[[dim2]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKPARALLEL: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKPARALLEL: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]]) = (%[[half1]], %[[half2]], %[[half3]], %{{.*}}, %{{.*}}, %{{.*}}) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[sizeMinusHalf3]], %[[dim3]], %[[dim4]], %[[dim5]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKPARALLEL: %[[dim8:.*]] = dim %[[arg0]], %[[c2]] : memref @@ -1178,22 +1204,30 @@ // CHECKLOOP: %[[c1:.*]] = constant 1 : index // CHECKLOOP: %[[c2:.*]] = constant 2 : index // CHECKLOOP: %[[c3:.*]] = constant 3 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg0]], %[[c3]] : memref -// CHECKLOOP: %[[dim4:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: %[[dim5:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKLOOP: %[[dim6:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKLOOP: %[[dim7:.*]] = dim %[[arg2]], %[[c3]] : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim6]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim7]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i6:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i7:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg1]], %[[c3]] : memref +// CHECKLOOP: %[[dim4:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[dim5:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKLOOP: %[[dim6:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKLOOP: %[[dim7:.*]] = dim %[[arg0]], %[[c3]] : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKLOOP: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim6]]] +// CHECKLOOP: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKLOOP: %[[half4:.*]] = affine.apply #[[$convHalf]]()[%[[dim7]]] +// CHECKLOOP: %[[sizeMinusHalf4:.*]] = subi %[[dim3]], %[[half4]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %[[half3]] to %[[sizeMinusHalf3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %[[half4]] to %[[sizeMinusHalf4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i6:.*]] = %{{.*}} to %[[dim6]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i7:.*]] = %{{.*}} to %[[dim7]] step %{{.*}} { // CHECKLOOP: %[[dim8:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKLOOP: %[[dim9:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKLOOP: %[[dim10:.*]] = dim %[[arg0]], %[[c2]] : memref @@ -1217,15 +1251,23 @@ // CHECKPARALLEL: %[[c1:.*]] = constant 1 : index // CHECKPARALLEL: %[[c2:.*]] = constant 2 : index // CHECKPARALLEL: %[[c3:.*]] = constant 3 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg0]], %[[c3]] : memref -// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg2]], %[[c3]] : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]], %[[i6:.*]], %[[i7:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim4]], %[[dim5]], %[[dim6]], %[[dim7]], %[[dim0]], %[[dim1]], %[[dim2]], %[[dim3]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg1]], %[[c3]] : memref +// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg0]], %[[c3]] : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKPARALLEL: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim6]]] +// CHECKPARALLEL: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKPARALLEL: %[[half4:.*]] = affine.apply #[[$convHalf]]()[%[[dim7]]] +// CHECKPARALLEL: %[[sizeMinusHalf4:.*]] = subi %[[dim3]], %[[half4]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]], %[[i6:.*]], %[[i7:.*]]) = (%[[half1]], %[[half2]], %[[half3]], %[[half4]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[sizeMinusHalf3]], %[[sizeMinusHalf4]], %[[dim4]], %[[dim5]], %[[dim6]], %[[dim7]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim8:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKPARALLEL: %[[dim9:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKPARALLEL: %[[dim10:.*]] = dim %[[arg0]], %[[c2]] : memref