diff --git a/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h @@ -14,10 +14,12 @@ #ifndef MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H #define MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H +#include "mlir/Support/LLVM.h" #include "mlir/Support/LogicalResult.h" namespace mlir { class AffineApplyOp; +class AffineMap; class Location; class OpBuilder; class OpFoldResult; @@ -71,6 +73,11 @@ function_ref)> stopCondition, bool closedUB = false); +/// Reify an already computed bound with Affine dialect ops. +OpFoldResult +reifyValueBound(OpBuilder &b, Location loc, AffineMap boundMap, + ArrayRef>> mapOperands); + } // namespace mlir #endif // MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H diff --git a/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt --- a/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(TransformOps) diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h @@ -0,0 +1,30 @@ +//===- TensorTransformOps.h - Tensor transformation ops ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H +#define MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H + +#include "mlir/Dialect/PDL/IR/PDLTypes.h" +#include "mlir/Dialect/Transform/IR/TransformInterfaces.h" +#include "mlir/Dialect/Transform/IR/TransformTypes.h" +#include "mlir/IR/OpImplementation.h" + +namespace mlir { +class DialectRegistry; + +namespace tensor { +class PadOp; + +void registerTransformDialectExtension(DialectRegistry ®istry); +} // namespace tensor +} // namespace mlir + +#define GET_OP_CLASSES +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc" + +#endif // MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td @@ -0,0 +1,64 @@ +//===- TensorTransformOps.td - Tensor transformation ops ---*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef TENSOR_TRANSFORM_OPS +#define TENSOR_TRANSFORM_OPS + +include "mlir/Dialect/PDL/IR/PDLTypes.td" +include "mlir/Dialect/Transform/IR/TransformDialect.td" +include "mlir/Dialect/Transform/IR/TransformInterfaces.td" +include "mlir/Dialect/Transform/IR/TransformTypes.td" +include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/IR/OpBase.td" + +def Transform_TensorPadOp : Transform_ConcreteOpType<"tensor.pad">; + +def MakeLoopIndependentOp + : Op { + let description = [{ + Rewrite the targeted ops such that their index-typed operands no longer + depend on any loop induction variable of the `num_loop` enclosing `scf.for` + loops. I.e., compute an upper bound that is independent of any such loop IV + for every tensor dimension. The transformed op could then be hoisted from + the `num_loop` enclosing loops. To preserve the original semantics, place a + `tensor.extract_slice` inside the loop. + + Currently supported operations are: + - tensor.empty: Replaced with a new tensor.empty with upper bound sizes, + followed by a tensor.extract_slice. + - tensor.pad: Replaced by an upper bound padding, followed by a + tensor.extract_slice. + + #### Return modes + + This operation fails if at least one induction variable could not be + eliminated. In case the targeted op is already independent of induction + variables, this transform succeeds and returns the unmodified target op. + + Otherwise, the returned handle points to a subset of the produced ops: + - tensor.empty: The returned handle points to the tensor.extract_slice op. + - tensor.pad: The returned handle points to the tensor.extract_slice op. + + This transform op consumes the target handle and produces a result handle. + }]; + + let arguments = (ins PDL_Operation:$target, I64Attr:$num_loops); + let results = (outs PDL_Operation:$transformed); + let assemblyFormat = "$target attr-dict"; + + let extraClassDeclaration = [{ + ::mlir::DiagnosedSilenceableFailure applyToOne( + ::mlir::Operation *target, + ::mlir::transform::ApplyToEachResultList &results, + ::mlir::transform::TransformState &state); + }]; +} + +#endif // TENSOR_TRANSFORM_OPS diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h @@ -18,6 +18,41 @@ namespace tensor { +/// Build a new tensor::PadOp with low/high padding that is independent of all +/// given independencies. If the op is already independent of all +/// independencies, the same PadOp result is returned. +/// +/// Failure indicates the no suitable upper bound for low/high padding could be +/// found. +/// +/// Example: +/// scf.for %iv = %lb to %ub step %step { +/// %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] +/// %p = tensor.pad %t low[5] high[%high] ... +/// ... +/// } +/// +/// The function builds IR such as: +/// %high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub] +/// %p_hoistable = tensor.pad %t low[5] high[%high_new] +/// %dim = tensor.dim %t, %c0 +/// %size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)> +/// (%iv)[%ub, %dim] +/// %slice = tensor.extract_slice %p_hoistable [0] [%size] [1] +/// +/// The slice is returned. +FailureOr buildIndependentOp(OpBuilder &b, tensor::PadOp padOp, + ValueRange independencies); + +/// Build a new tensor::EmptyOp who's dynamic sizes are independent of all +/// given independencies. If the op is already independent of all +/// independencies, the same EmptyOp result is returned. +/// +/// Failure indicates the no suitable upper bound for the dynamic sizes could be +/// found. +FailureOr buildIndependentOp(OpBuilder &b, tensor::EmptyOp emptyOp, + ValueRange independencies); + /// Populates `patterns` with patterns to wrap a tensor.pad op with an scf.if op /// to separate the cases where we don't need padding (all pad sizes are /// actually zeros) and where we indeed need padding. diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -70,6 +70,7 @@ #include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h" #include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h" #include "mlir/Dialect/Tensor/IR/ValueBoundsOpInterfaceImpl.h" +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h" #include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Transform/IR/TransformDialect.h" @@ -130,6 +131,7 @@ linalg::registerTransformDialectExtension(registry); memref::registerTransformDialectExtension(registry); scf::registerTransformDialectExtension(registry); + tensor::registerTransformDialectExtension(registry); vector::registerTransformDialectExtension(registry); // Register all external models. diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h --- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h +++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h @@ -72,6 +72,14 @@ ValueDimList dependencies, bool closedUB = false); + /// Compute a bound in that is independent of the of the values in + /// `independencies`. + static LogicalResult + computeIndependentBound(AffineMap &resultMap, ValueDimList &mapOperands, + presburger::BoundType type, Value value, + std::optional dim, ValueRange independencies, + bool closedUB = false); + /// Compute a constant bound for the given index-typed value or shape /// dimension size. /// diff --git a/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp b/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp --- a/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp @@ -37,6 +37,13 @@ boundMap, mapOperands, type, value, dim, stopCondition, closedUB))) return failure(); + // Reify bound. + return reifyValueBound(b, loc, boundMap, mapOperands); +} + +OpFoldResult mlir::reifyValueBound( + OpBuilder &b, Location loc, AffineMap boundMap, + ArrayRef>> mapOperands) { // Materialize tensor.dim/memref.dim ops. SmallVector operands; for (auto valueDim : mapOperands) { diff --git a/mlir/lib/Dialect/Tensor/CMakeLists.txt b/mlir/lib/Dialect/Tensor/CMakeLists.txt --- a/mlir/lib/Dialect/Tensor/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(TransformOps) add_subdirectory(Utils) diff --git a/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt @@ -0,0 +1,17 @@ +add_mlir_dialect_library(MLIRTensorTransformOps + TensorTransformOps.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor/TransformOps + + DEPENDS + MLIRTensorTransformOpsIncGen + + LINK_LIBS PUBLIC + MLIRAffineDialect + MLIRIR + MLIRPDLDialect + MLIRSCFDialect + MLIRTensorTransforms + MLIRTransformDialect +) diff --git a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp @@ -0,0 +1,94 @@ +//===- TensorTransformOps.cpp - Implementation of tensor transform ops ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h" + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/Transforms/Transforms.h" +#include "mlir/Dialect/Transform/IR/TransformDialect.h" +#include "mlir/Dialect/Transform/IR/TransformInterfaces.h" + +using namespace mlir; + +//===----------------------------------------------------------------------===// +// MakeLoopIndependentOp +//===----------------------------------------------------------------------===// + +DiagnosedSilenceableFailure transform::MakeLoopIndependentOp::applyToOne( + Operation *target, transform::ApplyToEachResultList &results, + transform::TransformState &state) { + // Gather IVs. + SmallVector ivs; + Operation *nextOp = target; + for (uint64_t i = 0; i < getNumLoops(); ++i) { + nextOp = nextOp->getParentOfType(); + if (!nextOp) { + DiagnosedSilenceableFailure diag = emitSilenceableError() + << "could not find " << i + << "-th enclosing loop"; + diag.attachNote(target->getLoc()) << "target op"; + return diag; + } + ivs.push_back(cast(nextOp).getInductionVar()); + } + + // Rewrite IR. + IRRewriter rewriter(target->getContext()); + FailureOr replacement = failure(); + if (auto padOp = dyn_cast(target)) { + replacement = tensor::buildIndependentOp(rewriter, padOp, ivs); + } else if (auto emptyOp = dyn_cast(target)) { + replacement = tensor::buildIndependentOp(rewriter, emptyOp, ivs); + } else { + DiagnosedSilenceableFailure diag = emitSilenceableError() + << "unsupported target p[]"; + diag.attachNote(target->getLoc()) << "target op"; + return diag; + } + if (failed(replacement)) { + DiagnosedSilenceableFailure diag = + emitSilenceableError() << "could not make target op loop-independent"; + diag.attachNote(target->getLoc()) << "target op"; + return diag; + } + rewriter.replaceOp(target, *replacement); + results.push_back(replacement->getDefiningOp()); + return DiagnosedSilenceableFailure::success(); +} + +//===----------------------------------------------------------------------===// +// Transform op registration +//===----------------------------------------------------------------------===// + +namespace { +class TensorTransformDialectExtension + : public transform::TransformDialectExtension< + TensorTransformDialectExtension> { +public: + using Base::Base; + + void init() { + declareGeneratedDialect(); + declareGeneratedDialect(); + + registerTransformOps< +#define GET_OP_LIST +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc" + >(); + } +}; +} // namespace + +#define GET_OP_CLASSES +#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc" + +void mlir::tensor::registerTransformDialectExtension( + DialectRegistry ®istry) { + registry.addExtensions(); +} diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt @@ -17,6 +17,7 @@ LINK_LIBS PUBLIC MLIRAffineDialect + MLIRAffineTransforms MLIRAffineUtils MLIRArithDialect MLIRBufferizationDialect @@ -29,4 +30,5 @@ MLIRTensorDialect MLIRTilingInterface MLIRTransforms + MLIRValueBoundsOpInterface ) diff --git a/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp b/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp @@ -0,0 +1,136 @@ +//===- IndependenceTransforms.cpp - Make ops independent of values --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tensor/Transforms/Transforms.h" + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/Transforms/Transforms.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" +#include "mlir/Interfaces/ValueBoundsOpInterface.h" + +using namespace mlir; +using namespace mlir::tensor; + +/// Make the given OpFoldResult independent of all independencies. +static FailureOr makeIndependent(OpBuilder &b, Location loc, + OpFoldResult ofr, + ValueRange independencies) { + if (ofr.is()) + return ofr; + Value value = ofr.get(); + AffineMap boundMap; + ValueDimList mapOperands; + if (failed(ValueBoundsConstraintSet::computeIndependentBound( + boundMap, mapOperands, presburger::BoundType::UB, value, + /*dim=*/std::nullopt, independencies, /*closedUB=*/true))) + return failure(); + return reifyValueBound(b, loc, boundMap, mapOperands); +}; + +FailureOr tensor::buildIndependentOp(OpBuilder &b, tensor::PadOp padOp, + ValueRange independencies) { + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(padOp); + Location loc = padOp.getLoc(); + + // Non-constant padding not supported. + Value constantPadding = padOp.getConstantPaddingValue(); + if (!constantPadding) + return failure(); + + SmallVector newMixedLow, newMixedHigh; + for (OpFoldResult ofr : padOp.getMixedLowPad()) { + auto ub = makeIndependent(b, loc, ofr, independencies); + if (failed(ub)) + return failure(); + newMixedLow.push_back(*ub); + } + for (OpFoldResult ofr : padOp.getMixedHighPad()) { + auto ub = makeIndependent(b, loc, ofr, independencies); + if (failed(ub)) + return failure(); + newMixedHigh.push_back(*ub); + } + + // Return existing tensor::PadOp if nothing has changed. + if (llvm::equal(padOp.getMixedLowPad(), newMixedLow) && + llvm::equal(padOp.getMixedHighPad(), newMixedHigh)) + return padOp.getResult(); + + // Create a new tensor::PadOp. + auto newPadOp = b.create( + loc, padOp.getResultType(), padOp.getSource(), newMixedLow, newMixedHigh, + constantPadding, padOp.getNofold(), /*attrs=*/ArrayRef{}); + + // Create a tensor::ExtractSliceOp. + // Reify the result sizes of the old tensor::PadOp. + ReifiedRankedShapedTypeDims reifiedSizes; + ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface = + dyn_cast(padOp.getOperation()); + if (failed(reifyShapedTypeInterface.reifyResultShapes(b, reifiedSizes))) + return failure(); + SmallVector offsets, sizes, strides; + for (int64_t i = 0; i < padOp.getResultType().getRank(); ++i) { + // offset = ub(low_padding) - low_padding + OpFoldResult prevLow = padOp.getMixedLowPad()[i]; + if (prevLow.is()) { + offsets.push_back(b.getIndexAttr(0)); + } else { + offsets.push_back( + b.create( + loc, b.getAffineDimExpr(0) - b.getAffineDimExpr(1), + std::initializer_list{newMixedLow[i].get(), + prevLow.get()}) + .getResult()); + } + // size = reified result size + if (!padOp.getResultType().isDynamicDim(i)) { + sizes.push_back(b.getIndexAttr(padOp.getResultType().getDimSize(i))); + } else { + sizes.push_back(reifiedSizes[0][i]); + } + // stride = 1 + strides.push_back(b.getIndexAttr(1)); + } + + return b.create(loc, newPadOp, offsets, sizes, strides) + .getResult(); +} + +FailureOr tensor::buildIndependentOp(OpBuilder &b, + tensor::EmptyOp emptyOp, + ValueRange independencies) { + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(emptyOp); + Location loc = emptyOp.getLoc(); + + SmallVector newSizes; + for (OpFoldResult ofr : emptyOp.getMixedSizes()) { + auto ub = makeIndependent(b, loc, ofr, independencies); + if (failed(ub)) + return failure(); + newSizes.push_back(*ub); + } + + // Return existing tensor::EmptyOp if nothing has changed. + if (llvm::equal(emptyOp.getMixedSizes(), newSizes)) + return emptyOp.getResult(); + + // Create a new tensor::EmptyOp. + Value newEmptyOp = + b.create(loc, newSizes, emptyOp.getType().getElementType()); + + // Create a tensor::ExtractSliceOp. + SmallVector offsets(newSizes.size(), b.getIndexAttr(0)); + SmallVector strides(newSizes.size(), b.getIndexAttr(1)); + return b + .create(loc, newEmptyOp, offsets, emptyOp.getMixedSizes(), + strides) + .getResult(); +} diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -385,6 +385,40 @@ closedUB); } +LogicalResult ValueBoundsConstraintSet::computeIndependentBound( + AffineMap &resultMap, ValueDimList &mapOperands, presburger::BoundType type, + Value value, std::optional dim, ValueRange independencies, + bool closedUB) { + // Return "true" if the given value is independent of all values in + // `independencies`. I.e., neither the value itself nor any value in the + // backward slice (reverse use-def chain) is contained in `independencies`. + auto isIndependent = [&](Value v) { + SmallVector worklist; + DenseSet visited; + worklist.push_back(v); + while (!worklist.empty()) { + Value next = worklist.pop_back_val(); + if (visited.contains(next)) + continue; + visited.insert(next); + if (llvm::is_contained(independencies, next)) + return false; + // TODO: DominanceInfo could be used to stop the traversal early. + Operation *op = next.getDefiningOp(); + if (!op) + continue; + worklist.append(op->getOperands().begin(), op->getOperands().end()); + } + return true; + }; + + // Reify bounds in terms of any independent values. + return computeBound( + resultMap, mapOperands, type, value, dim, + [&](Value v, std::optional d) { return isIndependent(v); }, + closedUB); +} + FailureOr ValueBoundsConstraintSet::computeConstantBound( presburger::BoundType type, Value value, std::optional dim, StopConditionFn stopCondition, bool closedUB) { diff --git a/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir b/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir @@ -0,0 +1,151 @@ +// RUN: mlir-opt %s -allow-unregistered-dialect \ +// RUN: -test-transform-dialect-interpreter -canonicalize \ +// RUN: -split-input-file -verify-diagnostics | FileCheck %s + +// This is a test case where "high" padding depends on the IV. + +// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)> +// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)> +// CHECK-LABEL: func @make_pad_loop_independent_1( +// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index, +// CHECK-SAME: %[[t:.*]]: tensor +func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]] + scf.for %i = %lb to %ub step %step { + // CHECK: %[[high:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]] + // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[5] high[%[[high]]] + // CHECK: %[[dim:.*]] = tensor.dim %[[t]] + // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]] + // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][0] [%[[size]]] [1] + %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] + %p = tensor.pad %t low[5] high[%high] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + // CHECK: "dummy.some_use"(%[[replacement]]) + "dummy.some_use"(%p) : (tensor) -> () + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + %1 = transform.tensor.make_loop_independent %0 {num_loops = 1} +} + +// ----- + +// This is a test case where "low" padding depends on the IV. + +// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)> +// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)> +// CHECK: #[[$map2:.*]] = affine_map<(d0)[s0] -> (d0 - s0)> +// CHECK-LABEL: func @make_pad_loop_independent_1( +// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index, +// CHECK-SAME: %[[t:.*]]: tensor +func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]] + scf.for %i = %lb to %ub step %step { + // CHECK: %[[low:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]] + // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[%[[low]]] high[5] + // CHECK: %[[dim:.*]] = tensor.dim %[[t]] + // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]] + // CHECK: %[[offset:.*]] = affine.apply #[[$map2]](%[[iv]])[%[[lb]]] + // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][%[[offset]]] [%[[size]]] [1] + %low = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] + %p = tensor.pad %t low[%low] high[5] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + // CHECK: "dummy.some_use"(%[[replacement]]) + "dummy.some_use"(%p) : (tensor) -> () + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + %1 = transform.tensor.make_loop_independent %0 {num_loops = 1} +} + +// ----- + +// CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 * 2 - 2)> +// CHECK-LABEL: func @two_loops( +func.func @two_loops(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + // CHECK: affine.apply #map()[%{{.*}}] + %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[] + %p = tensor.pad %t low[%low] high[5] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + "dummy.some_use"(%p) : (tensor) -> () + } + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + %1 = transform.tensor.make_loop_independent %0 {num_loops = 2} +} + +// ----- + +func.func @not_enough_loops(%lb: index, %ub: index, %step: index, + %t: tensor, %f: f32) { + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[] + // expected-note@below {{target op}} + %p = tensor.pad %t low[%low] high[5] { + ^bb0(%arg1: index): + tensor.yield %f : f32 + } : tensor to tensor + "dummy.some_use"(%p) : (tensor) -> () + } + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation + // expected-error@below {{could not find 2-th enclosing loop}} + %1 = transform.tensor.make_loop_independent %0 {num_loops = 3} +} + +// ----- + +// CHECK: #[[$map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)> +// CHECK: #[[$map1:.*]] = affine_map<()[s0, s1] -> (s0 - s1)> +// CHECK-LABEL: func @make_empty_loop_independent( +// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index) +func.func @make_empty_loop_independent(%lb: index, %ub: index, %step: index) { + // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]] + scf.for %i = %lb to %ub step %step { + // CHECK: %[[slice_sz:.*]] = affine.apply #[[$map]](%[[iv]])[%[[ub]]] + // CHECK: %[[empty_sz:.*]] = affine.apply #[[$map1]]()[%[[ub]], %[[lb]]] + // CHECK: %[[empty:.*]] = tensor.empty(%[[empty_sz]]) : tensor + // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[empty]][0] [%[[slice_sz]]] [1] + %sz = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] + %empty = tensor.empty(%sz) : tensor + // CHECK: "dummy.some_use"(%[[replacement]]) + "dummy.some_use"(%empty) : (tensor) -> () + } + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.empty"]} in %arg1 : (!pdl.operation) -> !pdl.operation + %1 = transform.tensor.make_loop_independent %0 {num_loops = 1} +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5630,6 +5630,7 @@ includes = ["include"], deps = [ ":AffineDialect", + ":AffineTransforms", ":AffineUtils", ":ArithDialect", ":ArithUtils", @@ -5646,6 +5647,57 @@ ":TensorPassIncGen", ":TilingInterface", ":Transforms", + ":ValueBoundsOpInterface", + "//llvm:Support", + ], +) + +td_library( + name = "TensorTransformOpsTdFiles", + srcs = [ + "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td", + ], + includes = ["include"], + deps = [ + ":PDLDialect", + ":TransformDialectTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorTransformOpsIncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + ["-gen-op-decls"], + "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc", + ), + ( + ["-gen-op-defs"], + "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td", + deps = [ + ":TensorTransformOpsTdFiles", + ], +) + +cc_library( + name = "TensorTransformOps", + srcs = glob(["lib/Dialect/Tensor/TransformOps/*.cpp"]), + hdrs = glob(["include/mlir/Dialect/Tensor/TransformOps/*.h"]), + includes = ["include"], + deps = [ + ":AffineDialect", + ":IR", + ":PDLDialect", + ":SCFDialect", + ":TensorDialect", + ":TensorTransformOpsIncGen", + ":TensorTransforms", + ":TransformDialect", "//llvm:Support", ], ) @@ -7081,6 +7133,7 @@ ":TensorDialect", ":TensorInferTypeOpInterfaceImpl", ":TensorTilingInterfaceImpl", + ":TensorTransformOps", ":TensorTransforms", ":TosaDialect", ":TosaToLinalg",