diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -145,7 +145,7 @@ /// `createAndFold` builder method. If `folder` is null, the regular `create` /// method is called. SmallVector applyMapToValues(OpBuilder &b, Location loc, - AffineMap map, ArrayRef values, + AffineMap map, ValueRange values, OperationFolder *folder = nullptr); /// Returns all the operands of `linalgOp` that are not views. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -76,6 +76,52 @@ return res; } +/// Creates a number of ranges equal to the number of dimensions in the `map`. +/// The function supports for now only limited number of expressions inside +/// map results. It expects a non-inverted, concatenated map and last values in +/// allViewSizes will be applied to the symbols in the map. +static SmallVector +emitLoopRangesWithSymbols(OpBuilder &b, Location loc, AffineMap map, + ValueRange allViewSizes) { + assert(allViewSizes.size() == map.getNumInputs() && + "Number of provided values must match number of inputs to the map."); + + SmallVector res(map.getNumDims()); + for (unsigned idx = 0, e = map.getNumResults(); idx < e; ++idx) { + auto result = map.getResult(idx); + if (auto d = result.dyn_cast()) { + if (res[d.getPosition()].offset) + continue; + res[d.getPosition()] = SubViewOp::Range{ + std_constant_index(0), allViewSizes[idx], std_constant_index(1)}; + } + + // m + n - s floordiv 2 + if (auto binOp = result.dyn_cast()) { + auto lhs = binOp.getLHS().dyn_cast(); + auto rhs = binOp.getRHS().dyn_cast(); + if (!lhs || !rhs) + continue; + + auto m = lhs.getLHS().dyn_cast(); + auto n = lhs.getRHS().dyn_cast(); + auto fDiv = rhs.getLHS().dyn_cast(); + if (!m || !n || !fDiv || fDiv.getKind() != AffineExprKind::FloorDiv || + fDiv.getLHS().getKind() != AffineExprKind::SymbolId || + fDiv.getRHS().getKind() != AffineExprKind::Constant) + continue; + + int mPos = m.getPosition(); + AffineMap fromMap = + AffineMap::get(map.getNumDims(), map.getNumSymbols(), fDiv); + Value from = applyMapToValues(b, loc, fromMap, allViewSizes).front(); + auto to = b.create(loc, allViewSizes[mPos], from); + res[mPos] = SubViewOp::Range{from, to, std_constant_index(1)}; + } + } + return res; +} + template static void inlineRegionAndEmitStore(OpType op, ArrayRef indexedValues, ArrayRef> indexing, @@ -466,31 +512,24 @@ SmallVector maps = getIndexingMaps(linalgOp); SmallVector sizes = getViewSizes(builder, linalgOp); AffineMap map = concatAffineMaps(maps); - if (map.getNumSymbols()) { - // Ignore symbols for now as they are not supported by inversePermutation. - unsigned dims = map.getNumDims(); - SmallVector res; - for (auto result : map.getResults()) - if (auto d = result.dyn_cast()) - res.push_back(d); - - map = AffineMap::get(dims, 0, res, map.getContext()); + SmallVector loopRanges; - // Cut off values that would have been applied to symbols - sizes.resize(res.size()); - } + if (map.getNumSymbols()) { + loopRanges = emitLoopRangesWithSymbols(scope.getBuilderRef(), + scope.getLocation(), map, sizes); + } else { + AffineMap invertedMap = inversePermutation(map); + if (!invertedMap) + return {}; + if (invertedMap.isEmpty()) { + emitScalarImplementation({}, linalgOp); + return LinalgLoops(); + } - AffineMap invertedMap = inversePermutation(map); - if (!invertedMap) - return {}; - if (invertedMap.isEmpty()) { - emitScalarImplementation({}, linalgOp); - return LinalgLoops(); + loopRanges = emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), + invertedMap, sizes); } - SmallVector allIvs; - auto loopRanges = emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), - invertedMap, sizes); GenerateLoopNest::doit( loopRanges, linalgOp.iterator_types().getValue(), [&](ValueRange ivs) { allIvs.append(ivs.begin(), ivs.end()); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -57,7 +57,7 @@ static Value emitOrFoldComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, - ArrayRef operandsRef, + ValueRange operandsRef, OperationFolder *folder) { SmallVector operands(operandsRef.begin(), operandsRef.end()); fullyComposeAffineMapAndOperands(&map, &operands); @@ -68,16 +68,16 @@ SmallVector mlir::linalg::applyMapToValues(OpBuilder &b, Location loc, AffineMap map, - ArrayRef values, + ValueRange values, OperationFolder *folder) { SmallVector res; res.reserve(map.getNumResults()); - unsigned numDims = map.getNumDims(); + unsigned numDims = map.getNumDims(), numSym = map.getNumSymbols(); // For each `expr` in `map`, applies the `expr` to the values extracted from // ranges. If the resulting application can be folded into a Value, the // folding occurs eagerly. Otherwise, an affine.apply operation is emitted. for (auto expr : map.getResults()) { - AffineMap map = AffineMap::get(numDims, 0, expr); + AffineMap map = AffineMap::get(numDims, numSym, expr); res.push_back(emitOrFoldComposedAffineApply(b, loc, map, values, folder)); } return res; diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -14,6 +14,7 @@ // CHECKLOOP-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> // CHECKLOOP-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> // CHECKLOOP-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> +// CHECKLOOP-DAG: #[[$convHalf:.*]] = affine_map<()[s0] -> (s0 floordiv 2)> // CHECKLOOP-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)> // CHECKPARALLEL-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> @@ -26,6 +27,7 @@ // CHECKPARALLEL-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> // CHECKPARALLEL-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> // CHECKPARALLEL-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> +// CHECKPARALLEL-DAG: #[[$convHalf:.*]] = affine_map<()[s0] -> (s0 floordiv 2)> // CHECKPARALLEL-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)> @@ -947,10 +949,12 @@ // CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKLOOP: %[[c0:.*]] = constant 0 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: scf.for %[[b:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[half:.*]] = affine.apply #[[$convHalf]]()[%[[dim1]]] +// CHECKLOOP: %[[sizeMinusHalf:.*]] = subi %[[dim0]], %[[half]] : index +// CHECKLOOP: scf.for %[[b:.*]] = %[[half]] to %[[sizeMinusHalf]] step %{{.*}} { +// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { // CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKLOOP: %[[aff:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim2]]] // CHECKLOOP: %[[va:.*]] = load %[[arg1]][%[[aff]]] : memref @@ -965,9 +969,11 @@ // CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL: %[[c0:.*]] = constant 0 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%{{.*}}, %{{.*}}) to (%[[dim1]], %[[dim0]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[half:.*]] = affine.apply #[[$convHalf]]()[%[[dim1]]] +// CHECKPARALLEL: %[[sizeMinusHalf:.*]] = subi %[[dim0]], %[[half]] : index +// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%[[half]], %{{.*}}) to (%[[sizeMinusHalf]], %[[dim1]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim2]]] // CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[aff]]] : memref @@ -1012,14 +1018,18 @@ // CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKLOOP: %[[c0:.*]] = constant 0 : index // CHECKLOOP: %[[c1:.*]] = constant 1 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim2]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { // CHECKLOOP: %[[dim4:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKLOOP: %[[dim5:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKLOOP: %[[aff1:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim4]]] @@ -1037,11 +1047,15 @@ // CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref // CHECKPARALLEL: %[[c0:.*]] = constant 0 : index // CHECKPARALLEL: %[[c1:.*]] = constant 1 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim2]], %[[dim3]], %[[dim0]], %[[dim1]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim2]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]]) = (%[[half1]], %[[half2]], %{{.*}}, %{{.*}}) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[dim2]], %[[dim3]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKPARALLEL: %[[aff1:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim4]]] @@ -1089,18 +1103,24 @@ // CHECKLOOP: %[[c0:.*]] = constant 0 : index // CHECKLOOP: %[[c1:.*]] = constant 1 : index // CHECKLOOP: %[[c2:.*]] = constant 2 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKLOOP: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[dim4:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKLOOP: %[[dim5:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKLOOP: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKLOOP: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %[[half3]] to %[[sizeMinusHalf3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { // CHECKLOOP: %[[dim6:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKLOOP: %[[dim7:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKLOOP: %[[dim8:.*]] = dim %[[arg0]], %[[c2]] : memref @@ -1121,13 +1141,19 @@ // CHECKPARALLEL: %[[c0:.*]] = constant 0 : index // CHECKPARALLEL: %[[c1:.*]] = constant 1 : index // CHECKPARALLEL: %[[c2:.*]] = constant 2 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim3]], %[[dim4]], %[[dim5]], %[[dim0]], %[[dim1]], %[[dim2]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim3]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKPARALLEL: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKPARALLEL: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]]) = (%[[half1]], %[[half2]], %[[half3]], %{{.*}}, %{{.*}}, %{{.*}}) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[sizeMinusHalf3]], %[[dim3]], %[[dim4]], %[[dim5]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKPARALLEL: %[[dim8:.*]] = dim %[[arg0]], %[[c2]] : memref @@ -1178,22 +1204,30 @@ // CHECKLOOP: %[[c1:.*]] = constant 1 : index // CHECKLOOP: %[[c2:.*]] = constant 2 : index // CHECKLOOP: %[[c3:.*]] = constant 3 : index -// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref -// CHECKLOOP: %[[dim3:.*]] = dim %[[arg0]], %[[c3]] : memref -// CHECKLOOP: %[[dim4:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKLOOP: %[[dim5:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKLOOP: %[[dim6:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKLOOP: %[[dim7:.*]] = dim %[[arg2]], %[[c3]] : memref -// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim6]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim7]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i6:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} { -// CHECKLOOP: scf.for %[[i7:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} { +// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKLOOP: %[[dim3:.*]] = dim %[[arg1]], %[[c3]] : memref +// CHECKLOOP: %[[dim4:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKLOOP: %[[dim5:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKLOOP: %[[dim6:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKLOOP: %[[dim7:.*]] = dim %[[arg0]], %[[c3]] : memref +// CHECKLOOP: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKLOOP: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKLOOP: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKLOOP: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKLOOP: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim6]]] +// CHECKLOOP: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKLOOP: %[[half4:.*]] = affine.apply #[[$convHalf]]()[%[[dim7]]] +// CHECKLOOP: %[[sizeMinusHalf4:.*]] = subi %[[dim3]], %[[half4]] : index +// CHECKLOOP: scf.for %[[i0:.*]] = %[[half1]] to %[[sizeMinusHalf1]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i1:.*]] = %[[half2]] to %[[sizeMinusHalf2]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i2:.*]] = %[[half3]] to %[[sizeMinusHalf3]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i3:.*]] = %[[half4]] to %[[sizeMinusHalf4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i6:.*]] = %{{.*}} to %[[dim6]] step %{{.*}} { +// CHECKLOOP: scf.for %[[i7:.*]] = %{{.*}} to %[[dim7]] step %{{.*}} { // CHECKLOOP: %[[dim8:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKLOOP: %[[dim9:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKLOOP: %[[dim10:.*]] = dim %[[arg0]], %[[c2]] : memref @@ -1217,15 +1251,23 @@ // CHECKPARALLEL: %[[c1:.*]] = constant 1 : index // CHECKPARALLEL: %[[c2:.*]] = constant 2 : index // CHECKPARALLEL: %[[c3:.*]] = constant 3 : index -// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref -// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg0]], %[[c3]] : memref -// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg2]], %[[c3]] : memref -// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]], %[[i6:.*]], %[[i7:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim4]], %[[dim5]], %[[dim6]], %[[dim7]], %[[dim0]], %[[dim1]], %[[dim2]], %[[dim3]]) step ({{.*}}) { +// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref +// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg1]], %[[c3]] : memref +// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg0]], %[[c0]] : memref +// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg0]], %[[c1]] : memref +// CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg0]], %[[c2]] : memref +// CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg0]], %[[c3]] : memref +// CHECKPARALLEL: %[[half1:.*]] = affine.apply #[[$convHalf]]()[%[[dim4]]] +// CHECKPARALLEL: %[[sizeMinusHalf1:.*]] = subi %[[dim0]], %[[half1]] : index +// CHECKPARALLEL: %[[half2:.*]] = affine.apply #[[$convHalf]]()[%[[dim5]]] +// CHECKPARALLEL: %[[sizeMinusHalf2:.*]] = subi %[[dim1]], %[[half2]] : index +// CHECKPARALLEL: %[[half3:.*]] = affine.apply #[[$convHalf]]()[%[[dim6]]] +// CHECKPARALLEL: %[[sizeMinusHalf3:.*]] = subi %[[dim2]], %[[half3]] : index +// CHECKPARALLEL: %[[half4:.*]] = affine.apply #[[$convHalf]]()[%[[dim7]]] +// CHECKPARALLEL: %[[sizeMinusHalf4:.*]] = subi %[[dim3]], %[[half4]] : index +// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]], %[[i6:.*]], %[[i7:.*]]) = (%[[half1]], %[[half2]], %[[half3]], %[[half4]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[sizeMinusHalf1]], %[[sizeMinusHalf2]], %[[sizeMinusHalf3]], %[[sizeMinusHalf4]], %[[dim4]], %[[dim5]], %[[dim6]], %[[dim7]]) step ({{.*}}) { // CHECKPARALLEL: %[[dim8:.*]] = dim %[[arg0]], %[[c0]] : memref // CHECKPARALLEL: %[[dim9:.*]] = dim %[[arg0]], %[[c1]] : memref // CHECKPARALLEL: %[[dim10:.*]] = dim %[[arg0]], %[[c2]] : memref