diff --git a/mlir/include/mlir/Dialect/Linalg/Analysis/ConstraintsSet.h b/mlir/include/mlir/Dialect/Linalg/Analysis/ConstraintsSet.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/Analysis/ConstraintsSet.h @@ -0,0 +1,67 @@ +//===- ConstraintsSet.h - Extensions for FlatAffineConstraints --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Linalg-specific constraints set extensions. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_LINALG_ANALYSIS_CONSTRAINTS_SET +#define MLIR_LINALG_ANALYSIS_CONSTRAINTS_SET + +#include "mlir/Analysis/AffineStructures.h" +#include "mlir/IR/AffineMap.h" + +namespace mlir { +class ValueRange; + +/// Linalg-specific constraints set extensions. +class ConstraintsSet : public FlatAffineConstraints { +public: + ConstraintsSet() : FlatAffineConstraints() {} + + /// Assuming `val` is defined by `val = affine.min map (operands)`, introduce + /// all the constraints `val >= expr_i(operands)`, where expr_i are all the + /// results of `map`. + // This API avoids taking a dependence on the AffineMinOp definition. + LogicalResult composeMin(Value val, AffineMap map, ValueRange operands) { + return composeMinOrMaxMapAndOperands(val, map, operands, /*min=*/true); + } + + /// Assuming `val` is defined by `val = affine.max map (operands)`, introduce + /// all the constraints `val <= expr_i(operands)`, where expr_i are all the + /// results of `map`. + // This API avoids taking a dependence on the AffineMaxOp definition. + LogicalResult composeMax(Value val, AffineMap map, ValueRange operands) { + return composeMinOrMaxMapAndOperands(val, map, operands, /*min=*/false); + } + + /// Assuming `val` is defined by `val = affine.apply map (operands)`, call + /// composeMap. + // This API avoids taking a dependence on the AffineMApplyOp definition. + LogicalResult composeAffineApply(Value val, AffineMap map, + ValueRange operands); + + /// Asserts the identifier `id` is in the constraints set and returns it. + unsigned lookupPos(Value id) const; + + /// If v is not in the constraint set, insert it as a dim or symbol depending + /// on `asDim`. + /// Return success if v is of dim id type when `asDim` is true and of symbol + /// id type when `asDim` is false. + /// Return failure otherwise. + LogicalResult ensureIdOfType(Value v, bool asDim); + +private: + /// Implementation detail for composeMin/Max. + LogicalResult composeMinOrMaxMapAndOperands(Value val, AffineMap map, + ValueRange operands, bool min); +}; + +} // namespace mlir + +#endif // MLIR_LINALG_ANALYSIS_CONSTRAINTS_SET diff --git a/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt @@ -1,12 +1,15 @@ add_mlir_dialect_library(MLIRLinalgAnalysis + ConstraintsSet.cpp DependenceAnalysis.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Linalg LINK_LIBS PUBLIC + MLIRAnalysis MLIRIR MLIRLinalg + MLIRLoopAnalysis MLIRMemRef MLIRStandard ) diff --git a/mlir/lib/Dialect/Linalg/Analysis/ConstraintsSet.cpp b/mlir/lib/Dialect/Linalg/Analysis/ConstraintsSet.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Analysis/ConstraintsSet.cpp @@ -0,0 +1,87 @@ +//===- ConstraintsSet.cpp - Extensions for FlatAffineConstraints ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Linalg-specific constraints set extensions. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Analysis/ConstraintsSet.h" +#include "mlir/Dialect/Affine/IR/AffineValueMap.h" +#include "mlir/IR/AffineMap.h" + +using namespace mlir; + +unsigned ConstraintsSet::lookupPos(Value id) const { + unsigned pos; + if (!findId(id, &pos)) { + llvm::errs() << "Lookup failed: " << id << "\n"; + llvm_unreachable("Lookup failed"); + } + return pos; +} + +LogicalResult ConstraintsSet::ensureIdOfType(Value v, bool asDim) { + if (!containsId(v)) { + if (asDim) + addDimId(getNumDimIds(), v); + else + addSymbolId(getNumSymbolIds(), v); + return success(); + } + unsigned pos = lookupPos(v); + return success((asDim && pos < getNumDimIds()) || + (!asDim && getNumDimIds() <= pos && + pos < getNumDimIds() + getNumSymbolIds())); +} + +LogicalResult ConstraintsSet::composeAffineApply(Value val, AffineMap map, + ValueRange operands) { + AffineValueMap avm(map, operands, val); + return composeMap(&avm); +} + +LogicalResult ConstraintsSet::composeMinOrMaxMapAndOperands(Value val, + AffineMap map, + ValueRange operands, + bool min) { + ConstraintsSet localCst; + std::vector> flatExprs; + if (failed(getFlattenedAffineExprs(map, &flatExprs, &localCst))) + return failure(); + assert(flatExprs.size() == map.getNumResults() && + "incorrect number of flattened expressiosn"); + + // Local vars on a per-need basis. + if (localCst.getNumLocalIds() != 0) + return failure(); + + // Add one inequality for each result connecting `val` to the other ids in + // `operands`. For instance, uf the expression is: + // `16 * i0 + i1` and + // `min` is true + // add: + // -d_val + 16 * i0 + i1 >= 0. + for (const auto &flatExpr : flatExprs) { + assert(flatExpr.size() >= operands.size() + 1); + SmallVector ineq(getNumCols(), 0); + for (unsigned i = 0, e = operands.size(); i < e; i++) + ineq[lookupPos(operands[i])] = min ? flatExpr[i] : -flatExpr[i]; + + // Set the coefficient for `d_val`. + ineq[lookupPos(val)] = min ? -1 : 1; + + // Set the constant term (upper bound in flatExpr is exclusive). + ineq[getNumCols() - 1] = min ? flatExpr[flatExpr.size() - 1] - 1 + : -flatExpr[flatExpr.size() - 1]; + + // Add the inequality connecting the result of the map to the rest. + addInequality(ineq); + } + + return success(); +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -12,8 +12,10 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Linalg/Transforms/Hoisting.h" +#include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/Utils.h" +#include "mlir/Dialect/Linalg/Analysis/ConstraintsSet.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/SCF/SCF.h" @@ -530,97 +532,6 @@ return outer.isDefinedOutsideOfLoop(v) || v.getDefiningOp(); } -/// Compute the tightest lower bound with quantities that are all defined -/// outside of `outer`. -/// Return null if such a bound cannot be computed. -Value computeLoopIndependentLowerBound(OpBuilder &b, scf::ForOp outer, - Value v) { - if (isDefinedOutsideOrConstant(outer, v)) - return v; - return Value(); -} - -/// Compute the tightest upper bound with quantities that are all defined -/// outside of `outer`. -/// Expects all ops in the backward slice of `v` up to `outer` to be either -/// scf.for, affine.min or affine.apply. -static Value computeLoopIndependentUpperBound(OpBuilder &b, scf::ForOp outer, - Value v) { - if (isDefinedOutsideOrConstant(outer, v)) - return v; - - LLVM_DEBUG(DBGS() << "Begin loopIndependentUpperBound for: " << v << "\n"); - - bool ok = - backwardsSliceOnlyHasOpsOfType( - outer, v); - assert(ok && "expected to only be defined by scf::ForOp and AffineMinOp"); - (void)ok; - - // Compute a backward slice up to, but not including, `outer`. - SetVector backwardSlice; - getBackwardSlice(v, &backwardSlice, - [&](Operation *op) { return outer->isProperAncestor(op); }); - backwardSlice.insert(v.getDefiningOp()); - - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(outer); - Value res = v; - BlockAndValueMapping bvm; - for (Operation *op : backwardSlice) { - if (isa(op)) - continue; - if (isa(op)) { - b.clone(*op, bvm); - continue; - } - auto sliceMinOp = cast(op); - GetMinMaxExprFn getSCFMinMax = [&](Value value, - SmallVectorImpl &dims, - SmallVectorImpl &symbols) { - return getSCFMinMaxExpr(value, dims, symbols, [&](Operation *op) { - return outer->isAncestor(op); - }); - }; - // Perform the substitution of the operands of AffineMinOp. - auto mapAndOperands = substituteMin(sliceMinOp, getSCFMinMax); - SmallVector resultOperands = mapAndOperands.dims; - llvm::append_range(resultOperands, mapAndOperands.symbols); - AffineMap map = mapAndOperands.map; - canonicalizeMapAndOperands(&map, &resultOperands); - map = simplifyAffineMap(map); - res = b.create( - outer->getLoc(), map, - llvm::to_vector<4>(llvm::map_range(resultOperands, [&](Value operand) { - return bvm.lookupOrDefault(operand); - }))); - bvm.map(sliceMinOp, res); - } - LLVM_DEBUG(DBGS() << "End loopIndependentUpperBound with: " << res << "\n"); - return res; -} - -/// Return the number of iterations in the loop (ub - lb).ceilDiv(step). -/// The returned Value is guaranteed not to depend on any loop comprised in -/// [`outer`, `forOp`]. -/// Return null if such a loop-independent quantity cannot be computed. -static Value buildLoopTripCount(OpBuilder &b, scf::ForOp outer, - scf::ForOp forOp) { - MLIRContext *ctx = forOp->getContext(); - AffineExpr lb, ub, step; - bindDims(ctx, lb, ub); - bindSymbols(ctx, step); - Value lbVal = computeLoopIndependentLowerBound(b, outer, forOp.lowerBound()), - ubVal = computeLoopIndependentUpperBound(b, outer, forOp.upperBound()), - stepVal = forOp.step(); - if (!lbVal || !ubVal || !stepVal) - return Value(); - auto loc = forOp->getLoc(); - Value res = b.create(loc, (ub - lb).ceilDiv(step), - ValueRange{lbVal, ubVal, stepVal}); - return res; -} - /// Return the current iteration number in the loop (iv - lb).ceilDiv(step). /// The returned Value is guaranteed not to depend on any loop comprised in /// [`outer`, `forOp`]. @@ -631,14 +542,135 @@ AffineExpr iv, lb, step; bindDims(ctx, iv, lb); bindSymbols(ctx, step); - Value ivVal = forOp.getInductionVar(), - lbVal = computeLoopIndependentLowerBound(b, outer, forOp.lowerBound()), - stepVal = forOp.step(); - if (!ivVal || !lbVal || !stepVal) + if (!isDefinedOutsideOrConstant(outer, forOp.lowerBound()) || + !isDefinedOutsideOrConstant(outer, forOp.step())) return Value(); + Value ivVal = forOp.getInductionVar(), lbVal = forOp.lowerBound(), + stepVal = forOp.step(); auto loc = forOp->getLoc(); - return b.create(loc, (iv - lb).ceilDiv(step), - ValueRange{ivVal, lbVal, stepVal}); + return b.createOrFold(loc, (iv - lb).ceilDiv(step), + ValueRange{ivVal, lbVal, stepVal}); +} + +/// Given a set of loops, assumed to be scf::ForOp, create a constraint set +/// containing the inequalities `iv - lb >= 0` and `-iv + ub >= 0` for each +/// loop. +static ConstraintsSet initLoopIvsAndBounds(ArrayRef loops) { + ConstraintsSet constraints; + for (Operation *op : loops) + constraints.addDimId(constraints.getNumDimIds(), + cast(op).getInductionVar()); + for (Operation *op : loops) + constraints.addDimId(constraints.getNumDimIds(), + cast(op).lowerBound()); + for (Operation *op : loops) + constraints.addDimId(constraints.getNumDimIds(), + cast(op).upperBound()); + unsigned numLoops = loops.size(); + for (unsigned ivIdx = 0, e = numLoops; ivIdx < e; ++ivIdx) { + // iv - lb >= 0 + SmallVector ineqLb(constraints.getNumCols(), 0); + ineqLb[ivIdx] = 1; + ineqLb[ivIdx + numLoops] = -1; + // -iv + ub >= 0 + SmallVector ineqUb(constraints.getNumCols(), 0); + ineqUb[ivIdx] = -1; + ineqUb[ivIdx + 2 * numLoops] = 1; + ineqUb[constraints.getNumCols() - 1] = -1; + constraints.addInequality(ineqLb); + constraints.addInequality(ineqUb); + } + return constraints; +} + +/// For each loop in `loops`, determine the ops involved in the construction of +/// its upper bound---up to the outerLimit loop--- and fold them as new +/// inequalities in the constraint set. +/// This is achieved by computing the backwardSlice of the loop's upper bound +/// and iteratively folding each op in reverse topological order to guarantee +/// use-def ordering. +/// As operations are folded in, their result is projected out of the +/// constraints set. +/// The following operations are supported: +/// - scf::ForOp are simply skipped. +/// - AffineApplyOp are composed to replace the result by an equality. +/// - AffineMinOp are composed by adding each entry as an upper bound. +/// If any other operation is met, return failure. +// TODO: extend on a per-need basis. +static LogicalResult +foldUpperBoundsIntoConstraintsSet(ConstraintsSet &constraints, + scf::ForOp outerLimit, + ArrayRef loops) { + SetVector toProjectOut; + for (Operation *loop : loops) { + auto ub = cast(loop).upperBound(); + if (isDefinedOutsideOrConstant(outerLimit, ub)) + continue; + + // Compute a backward slice up to, but not including, `outerLimit`. + SetVector backwardSlice; + getBackwardSlice(ub, &backwardSlice, [&](Operation *op) { + return outerLimit->isProperAncestor(op); + }); + backwardSlice.insert(ub.getDefiningOp()); + + // Iterate over all ops in the slice and compose them in the constraints. + for (Operation *op : llvm::reverse(backwardSlice)) { + if (!isa(op)) + return failure(); + if (isa(op)) + continue; + // Ensure there is a + auto ensureIdFailed = [&](Value v) { + return failed(constraints.ensureIdOfType(v, /*asDim=*/true)); + }; + + // Ensure all ids exist and add results for later projection. + if (llvm::any_of(op->getResults(), ensureIdFailed) || + llvm::any_of(op->getOperands(), ensureIdFailed)) + return failure(); + + // All supported ops have 1 result. + // TODO: extend when needed. + toProjectOut.insert(op->getResult(0)); + + // Compose supported ops. + if (auto affineApplyOp = dyn_cast(op)) { + if (failed(constraints.composeAffineApply(affineApplyOp.getResult(), + affineApplyOp.getAffineMap(), + affineApplyOp.getOperands()))) + return failure(); + continue; + } + auto affineMinOp = cast(op); + if (failed(constraints.composeMin(affineMinOp.getResult(), + affineMinOp.getAffineMap(), + affineMinOp.operands()))) + return failure(); + } + } + for (Value v : toProjectOut) + constraints.projectOut(v); + return success(); +} + +/// Compute dynamic tensor sizes, independent of any value defined inside +/// `outer` and such that every n-D iteration of the packingLoops has its own +/// space (so that each packed buffer has a storage location). This is achieved +/// by computing the extent for each of the packing loops. +static LogicalResult computeBounds(scf::ForOp outer, + ArrayRef packingLoops, + SmallVector &lbs, + SmallVector &ubs) { + // Packing loop IVs are introduced as the first positions. + ConstraintsSet constraints = initLoopIvsAndBounds(packingLoops); + if (failed( + foldUpperBoundsIntoConstraintsSet(constraints, outer, packingLoops))) + return failure(); + // Compute the bounds of the first positions, assuming the others are fixed. + constraints.getSliceBounds(/*pos=*/0, /*num=*/packingLoops.size(), + outer->getContext(), &lbs, &ubs); + return success(); } /// Ensure prerequisites that guarantee pad op hoisting can occur. @@ -725,28 +757,49 @@ assert(outermostEnclosingForOp == backwardSlice.front()); scf::ForOp outer = cast(outermostEnclosingForOp); - if (llvm::any_of(packingLoops, [&](Operation *op) { - scf::ForOp forOp = cast(op); - Value lb = forOp.lowerBound(), ub = forOp.upperBound(), - step = forOp.step(); - return !isDefinedOutsideOrConstant(outer, lb) || - !(isDefinedOutsideOrConstant(outer, ub) || - backwardsSliceOnlyHasOpsOfType(outer, ub)) || - !isDefinedOutsideOrConstant(outer, step); - })) + + ConstraintsSet constraints = initLoopIvsAndBounds(packingLoops.getArrayRef()); + if (failed(foldUpperBoundsIntoConstraintsSet(constraints, outer, + packingLoops.getArrayRef()))) + return failure(); + + unsigned numLoops = packingLoops.size(); + SmallVector lbs(numLoops), ubs(numLoops); + if (failed(computeBounds(outer, packingLoops.getArrayRef(), lbs, ubs))) return failure(); + SmallVector allValues; + constraints.getAllIdValues(&allValues); + SmallVector allNonLoopValues(allValues.begin() + numLoops, + allValues.end()); + + // For each packingLoop, create the extent by (ub - lb).ceilDiv(step). // IP just before the outermost loop considered that we hoist above. - OpBuilder b(outermostEnclosingForOp); - dynamicTensorSizes = - llvm::to_vector<4>(llvm::map_range(packingLoops, [&](Operation *op) { - return buildLoopTripCount(b, cast(outermostEnclosingForOp), - cast(op)); - })); - // Assert all loop trip counts can be computed. - if (!llvm::all_of(dynamicTensorSizes, [](Value v) { return v; })) - llvm_unreachable("loop independence prerequisite not met"); + ImplicitLocOpBuilder b(outer->getLoc(), outer); + assert(packingLoops.size() == lbs.size() && "expected matching lb sizes"); + assert(packingLoops.size() == ubs.size() && "expected matching ub sizes"); + for (auto it : llvm::zip(packingLoops, lbs, ubs)) { + scf::ForOp loop = cast(std::get<0>(it)); + AffineMap lbMap = std::get<1>(it); + AffineMap ubMap = std::get<2>(it); + SmallVector lbOperands(allNonLoopValues); + canonicalizeMapAndOperands(&lbMap, &lbOperands); + Value lbVal = b.createOrFold(lbMap, lbOperands); + + SmallVector ubOperands(allNonLoopValues); + canonicalizeMapAndOperands(&ubMap, &ubOperands); + Value ubVal = b.createOrFold(ubMap, ubOperands); + + AffineExpr lb, ub, step; + bindDims(b.getContext(), lb, ub); + bindSymbols(b.getContext(), step); + Value res = b.createOrFold( + (ub - lb).ceilDiv(step), + ValueRange{lbVal, ubVal, cast(loop).step()}); + + dynamicTensorSizes.push_back(res); + } + return success(); } diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir --- a/mlir/test/Dialect/Linalg/hoist-padding.mlir +++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir @@ -141,8 +141,10 @@ // ----- + // CHECK-DAG: #[[$MIN_REST8:[0-9a-z]+]] = affine_map<(d0)[s0] -> (8, -d0 + s0)> -// CHECK-DAG: #[[$MIN_MOD4:[0-9a-z]+]] = affine_map<(d0) -> (4, d0 - ((d0 - 1) floordiv 4) * 4)> +// CHECK-DAG: #[[$MIN_REST4:[0-9a-z]+]] = affine_map<(d0, d1) -> (4, d0 - d1)> +// CHECK-DAG: #[[$MIN_REST2:[0-9a-z]+]] = affine_map<(d0, d1) -> (2, d0 - d1)> // CHECK-DAG: #[[$DIV4:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 4)> // CHECK-DAG: #[[$DIV2:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 2)> #map0 = affine_map<(d0)[s0] -> (8, -d0 + s0)> @@ -167,20 +169,18 @@ // // CHECK: %[[MR8:.*]] = affine.min #[[$MIN_REST8]](%[[I]]) // CHECK: %[[D0:.*]] = affine.apply #[[$DIV4]](%[[MR8]]) - // CHECK: %[[MM4:.*]] = affine.min #[[$MIN_MOD4]](%[[MR8]]) - // CHECK: %[[D1:.*]] = affine.apply #[[$DIV2]](%[[MM4]]) // Init tensor and pack. - // CHECK: %[[INIT_PACKED_A:.*]] = linalg.init_tensor [%[[D0]], %[[D1]], 2] : tensor - // CHECK: %[[PACKED_A:.*]] = scf.for %[[II:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[INIT_PACKED_A]]) -> (tensor) { + // CHECK: %[[INIT_PACKED_A:.*]] = linalg.init_tensor [%[[D0]], 2, 2] : tensor + // CHECK: %[[CAST_INIT_PACKED_A:.*]] = tensor.cast %[[INIT_PACKED_A]] : tensor to tensor + // CHECK: %[[PACKED_A:.*]] = scf.for %[[II:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[CAST_INIT_PACKED_A]]) -> (tensor) { // CHECK: scf.for %[[III:[0-9a-z]+]] = // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor // // CHECK: %[[D0_2:.*]] = affine.apply #[[$DIV4]](%[[MR8]]) - // CHECK: %[[MM4_2:.*]] = affine.min #[[$MIN_MOD4]](%[[MR8]]) - // CHECK: %[[D1_2:.*]] = affine.apply #[[$DIV2]](%[[MM4_2]]) // Init tensor and pack. - // CHECK: %[[INIT_PACKED_B:.*]] = linalg.init_tensor [%[[D0_2]], %[[D1_2]], 2] : tensor - // CHECK: %[[PACKED_B:.*]] = scf.for %[[II_2:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[INIT_PACKED_B]]) -> (tensor) { + // CHECK: %[[INIT_PACKED_B:.*]] = linalg.init_tensor [%[[D0_2]], 2, 2] : tensor + // CHECK: %[[CAST_INIT_PACKED_B:.*]] = tensor.cast %[[INIT_PACKED_B]] : tensor to tensor + // CHECK: %[[PACKED_B:.*]] = scf.for %[[II_2:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[CAST_INIT_PACKED_B]]) -> (tensor) { // CHECK: scf.for %[[III_2:[0-9a-z]+]] = // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor // Compute.