Diff 322376

mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td

Show First 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	def Shape_AddOp : Shape_Op<"add", [Commutative, NoSideEffect]> {
let assemblyFormat = [{		let assemblyFormat = [{
$lhs `,` $rhs attr-dict `:` type($lhs) `,` type($rhs) `->` type($result)		$lhs `,` $rhs attr-dict `:` type($lhs) `,` type($rhs) `->` type($result)
}];		}];

let verifier = [{ return verifySizeOrIndexOp(*this); }];		let verifier = [{ return verifySizeOrIndexOp(*this); }];
}		}

def Shape_BroadcastOp : Shape_Op<"broadcast", [Commutative]> {		def Shape_BroadcastOp : Shape_Op<"broadcast", [Commutative]> {
let summary = "Returns the broadcasted output shape of two inputs";		let summary = "Returns the broadcasted output shape of two or more inputs";
let description = [{		let description = [{
Returns the broadcasted shape for two input shapes or extent tensors. Both		Returns the broadcasted shape for input shapes or extent tensors. The rest
operands can be of type `shape.shape` or `tensor<?xindex>`. The result is of		of this description is simplified for the 2 input case but can be extended
type `shape.shape` and, if both operands are tensors, may be of type		to more inputs. Both operands can be of type `shape.shape` or
`tensor<?xindex>`.		`tensor<?xindex>`. The result is of type `shape.shape` and, if both
		operands are tensors, may be of type `tensor<?xindex>`.

If the two operand shapes are of different rank the smaller one is padded		If the two operand shapes are of different rank the smaller one is padded
with 1's from the left. The resulting broadcasted shape is then defined as		with 1's from the left. The resulting broadcasted shape is then defined as

result[i] = lhs[i] if lhs[i] == rhs[i]		result[i] = lhs[i] if lhs[i] == rhs[i]
= lhs[i] if rhs[i] == 1		= lhs[i] if rhs[i] == 1
= rhs[i] if lhs[i] == 1.		= rhs[i] if lhs[i] == 1.

In case the resulting shape is undefined, i.e. if corresponding extents are		In case the resulting shape is undefined, i.e. if corresponding extents are
different from each other but none is 1, the result is an error shape.		different from each other but none is 1, the result is an error shape.
Likewise error values are propagated if any of the operands holds an error		Likewise error values are propagated if any of the operands holds an error
value. If the result type is an extent tensor (and can therefore not hold		value. If the result type is an extent tensor (and can therefore not hold
the error value) the behavior may be undefined. The optional string		the error value) the behavior may be undefined. The optional string
attribute can be used to describe the error case.		attribute can be used to describe the error case.
}];		}];

let arguments = (ins Shape_ShapeOrExtentTensorType:$lhs,		let arguments = (ins Variadic<Shape_ShapeOrExtentTensorType>:$shapes,
Shape_ShapeOrExtentTensorType:$rhs,
OptionalAttr<StrAttr>:$error);		OptionalAttr<StrAttr>:$error);
let results = (outs Shape_ShapeOrExtentTensorType:$result);		let results = (outs Shape_ShapeOrExtentTensorType:$result);

let assemblyFormat = [{		let assemblyFormat = [{
$lhs `,` $rhs attr-dict `:` type($lhs) `,` type($rhs) `->` type($result)		$shapes attr-dict `:` type($shapes) `->` type($result)
}];		}];

let verifier = [{ return ::verifyShapeOrExtentTensorOp(*this); }];		let builders = [OpBuilderDAG<(ins "::mlir::Type":$result,
let hasFolder = 1;		"::mlir::Value":$lhs, "::mlir::Value":$rhs,
		"/optional/ ::mlir::StringAttr":$error), [{
		build($_builder, $_state, result, ::llvm::makeArrayRef({lhs, rhs}), error);
		}]>
		];
		jpienaarUnsubmitted Done Reply Inline Actions Do we have places that use this form? jpienaar: Do we have places that use this form?
		tpoppAuthorUnsubmitted Done Reply Inline Actions This was the original form of the build method before making the number of inputs variadic. I thought it might be nice to still have it for the common case and to make the transition essentially an NFC tpopp: This was the original form of the build method before making the number of inputs variadic. I…

let verifier = [{ return ::verifyShapeOrExtentTensorOp(*this); }];		let hasFolder = 1;
		let verifier = [{
		return success(succeeded(::verifyShapeOrExtentTensorOp(*this)) &&
		getNumOperands() >= 2);
		}];
}		}

def Shape_ConstShapeOp : Shape_Op<"const_shape", [ConstantLike, NoSideEffect]> {		def Shape_ConstShapeOp : Shape_Op<"const_shape", [ConstantLike, NoSideEffect]> {
let summary = "Creates a constant shape or extent tensor";		let summary = "Creates a constant shape or extent tensor";
let description = [{		let description = [{
Creates a constant shape or extent tensor. The individual extents are given		Creates a constant shape or extent tensor. The individual extents are given
as the `shape` attribute. The number of these values equals the shape's		as the `shape` attribute. The number of these values equals the shape's
rank.		rank.
▲ Show 20 Lines • Show All 750 Lines • Show Last 20 Lines

mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp

	//===- ShapeToStandard.cpp - conversion from Shape to Standard dialect ----===//			//===- ShapeToStandard.cpp - conversion from Shape to Standard dialect ----===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h"			#include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h"

	#include "../PassDetail.h"			#include "../PassDetail.h"
	#include "mlir/Dialect/SCF/SCF.h"			#include "mlir/Dialect/SCF/SCF.h"
	#include "mlir/Dialect/Shape/IR/Shape.h"			#include "mlir/Dialect/Shape/IR/Shape.h"
	#include "mlir/Dialect/StandardOps/IR/Ops.h"			#include "mlir/Dialect/StandardOps/IR/Ops.h"
	#include "mlir/Dialect/Tensor/IR/Tensor.h"			#include "mlir/Dialect/Tensor/IR/Tensor.h"
	#include "mlir/IR/BlockAndValueMapping.h"			#include "mlir/IR/BlockAndValueMapping.h"
				#include "mlir/IR/ImplicitLocOpBuilder.h"
	#include "mlir/Transforms/DialectConversion.h"			#include "mlir/Transforms/DialectConversion.h"
				#include "llvm/ADT/STLExtras.h"

	using namespace mlir;			using namespace mlir;
	using namespace mlir::shape;			using namespace mlir::shape;
	using namespace mlir::scf;			using namespace mlir::scf;

	/// Conversion patterns.			/// Conversion patterns.
	namespace {			namespace {
	class AnyOpConversion : public OpConversionPattern<AnyOp> {			class AnyOpConversion : public OpConversionPattern<AnyOp> {
	▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines
	namespace {			namespace {
	struct BroadcastOpConverter : public OpConversionPattern<BroadcastOp> {			struct BroadcastOpConverter : public OpConversionPattern<BroadcastOp> {
	using OpConversionPattern<BroadcastOp>::OpConversionPattern;			using OpConversionPattern<BroadcastOp>::OpConversionPattern;

	LogicalResult			LogicalResult
	matchAndRewrite(BroadcastOp op, ArrayRef<Value> operands,			matchAndRewrite(BroadcastOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const override;			ConversionPatternRewriter &rewriter) const override;
	};			};

				// Get the resulting extent in a given dimension. This is computed with any
				// number of extent tensors and shifted offsets into them.
				jpienaarUnsubmitted Done Reply Inline Actions The last sentence is true for all rewrites, so redundant here. jpienaar: The last sentence is true for all rewrites, so redundant here.
				Value getBroadcastedDim(ImplicitLocOpBuilder lb, ValueRange extentTensors,
				ValueRange rankDiffs, Value outputDimension) {
				Value one = lb.create<ConstantIndexOp>(1);
				Value broadcastedDim = one;
				for (auto tup : llvm::zip(extentTensors, rankDiffs)) {
				Value shape = std::get<0>(tup);
				Value rankDiff = std::get<1>(tup);
				Value outOfBounds =
				lb.create<CmpIOp>(CmpIPredicate::ult, outputDimension, rankDiff);
				Type indexTy = lb.getIndexType();
				jpienaarUnsubmitted Done Reply Inline Actions A comment here would be good to explain why dim is used to get rank. Also does dim work on a shape? (Tensor yes, and so are we guaranteed we'd be in tensor world here?) jpienaar: A comment here would be good to explain why dim is used to get rank. Also does dim work on a…
				tpoppAuthorUnsubmitted Done Reply Inline Actions This was guaranteed in the caller. I'll replicate the check here though in case of future uses from other locations. tpopp: This was guaranteed in the caller. I'll replicate the check here though in case of future uses…
				broadcastedDim =
				lb.create<IfOp>(
				TypeRange{indexTy}, outOfBounds,
				[&](OpBuilder &b, Location loc) {
				b.create<scf::YieldOp>(loc, broadcastedDim);
				},
				[&](OpBuilder &b, Location loc) {
				// The broadcasting logic is:
				// - if one extent (here we arbitrarily choose the
				// extent from the greater-rank operand) is equal to 1,
				// then take the extent from the other operand
				// - otherwise, take the extent as-is.
				// Note that this logic remains correct in the presence
				// of dimensions of zero extent.
				Value lesserRankOperandDimension =
				b.create<SubIOp>(loc, indexTy, outputDimension, rankDiff);
				Value lesserRankOperandExtent = b.create<tensor::ExtractOp>(
				loc, shape, ValueRange{lesserRankOperandDimension});

				Value dimIsOne = b.create<CmpIOp>(loc, CmpIPredicate::eq,
				lesserRankOperandExtent, one);
				Value dim = b.create<SelectOp>(loc, dimIsOne, broadcastedDim,
				lesserRankOperandExtent);
				b.create<scf::YieldOp>(loc, dim);
				})
				herhutUnsubmitted Done Reply Inline Actions `inBound` is confusing. It is true if we are outside the bounds of the index, right? herhut: `inBound` is confusing. It is true if we are outside the bounds of the index, right?
				.getResult(0);
				}
				return broadcastedDim;
				}
	} // namespace			} // namespace

	LogicalResult BroadcastOpConverter::matchAndRewrite(			LogicalResult BroadcastOpConverter::matchAndRewrite(
	BroadcastOp op, ArrayRef<Value> operands,			BroadcastOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const {			ConversionPatternRewriter &rewriter) const {
	// For now, this lowering is only defined on `tensor<?xindex>` operands, not			// For now, this lowering is only defined on `tensor<?xindex>` operands, not
	// on shapes.			// on shapes.
	if (op.getType().isa<ShapeType>())			if (op.getType().isa<ShapeType>())
	return failure();			return failure();

	assert(!op.lhs().getType().isa<ShapeType>() &&
	!op.rhs().getType().isa<ShapeType>());
	auto loc = op.getLoc();			auto loc = op.getLoc();
				herhutUnsubmitted Done Reply Inline Actions This sentence ends abruptly. herhut: This sentence ends abruptly.
				ImplicitLocOpBuilder lb(loc, rewriter);
	BroadcastOp::Adaptor transformed(operands);			BroadcastOp::Adaptor transformed(operands);
	Value zero = rewriter.create<ConstantIndexOp>(loc, 0);
	Value one = rewriter.create<ConstantIndexOp>(loc, 1);

	// Find smaller and greater rank and extent tensor.			Value zero = lb.create<ConstantIndexOp>(0);
	Value lhsRank = rewriter.create<DimOp>(loc, op.lhs(), zero);			Type indexTy = lb.getIndexType();
	Value rhsRank = rewriter.create<DimOp>(loc, op.rhs(), zero);
	Value lhsRankULE =
	rewriter.create<CmpIOp>(loc, CmpIPredicate::ule, lhsRank, rhsRank);
	Type indexTy = rewriter.getIndexType();
	Value lesserRank =
	rewriter.create<SelectOp>(loc, lhsRankULE, lhsRank, rhsRank);
	Value greaterRank =
	rewriter.create<SelectOp>(loc, lhsRankULE, rhsRank, lhsRank);
	auto erasedRankType =
	RankedTensorType::get({ShapedType::kDynamicSize}, indexTy);
	Value rankErasedLhs =
	rewriter.create<tensor::CastOp>(loc, erasedRankType, transformed.lhs());
	Value rankErasedRhs =
	rewriter.create<tensor::CastOp>(loc, erasedRankType, transformed.rhs());
	Value lesserRankOperand =
	rewriter.create<SelectOp>(loc, lhsRankULE, rankErasedLhs, rankErasedRhs);
	Value greaterRankOperand =
	rewriter.create<SelectOp>(loc, lhsRankULE, rankErasedRhs, rankErasedLhs);

	Value rankDiff =			// Save all the ranks for bounds checking. Because this is a tensor
	rewriter.create<SubIOp>(loc, indexTy, greaterRank, lesserRank);			// representing the shape extents, the rank is the extent of the only
	rewriter.replaceOpWithNewOp<tensor::GenerateOp>(			// dimension in the tensor.
	op, getExtentTensorType(op.getContext()), ValueRange{greaterRank},			SmallVector<Value> ranks, rankDiffs;
				llvm::append_range(ranks, llvm::map_range(transformed.shapes(), [&](Value v) {
				return lb.create<DimOp>(v, zero);
				herhutUnsubmitted Done Reply Inline Actions You could fold this up. In the then case, return `reduceDim`. In the else case, do the select. herhut: You could fold this up. In the then case, return `reduceDim`. In the else case, do the select.
				}));

				// Find the maximum rank
				Value maxRank = ranks.front();
				for (Value v : llvm::drop_begin(ranks, 1)) {
				Value rankIsGreater = lb.create<CmpIOp>(CmpIPredicate::ugt, v, maxRank);
				maxRank = lb.create<SelectOp>(rankIsGreater, v, maxRank);
				}

				herhutUnsubmitted Done Reply Inline Actions This comment seems lonely. herhut: This comment seems lonely.
				tpoppAuthorUnsubmitted Done Reply Inline Actions Fixed tpopp: Fixed
				// Calculate the difference of ranks and the maximum rank for later offsets.
				llvm::append_range(rankDiffs, llvm::map_range(ranks, [&](Value v) {
				return lb.create<SubIOp>(indexTy, maxRank, v);
				}));

				rewriter.replaceOp(
				op, lb.create<tensor::GenerateOp>(
				getExtentTensorType(lb.getContext()), ValueRange{maxRank},
	[&](OpBuilder &b, Location loc, ValueRange args) {			[&](OpBuilder &b, Location loc, ValueRange args) {
	Value outputDimension = args[0];			Value broadcastedDim = getBroadcastedDim(
	Value isUnchallengedDimension = b.create<CmpIOp>(			ImplicitLocOpBuilder(loc, b), transformed.shapes(),
	loc, CmpIPredicate::ult, outputDimension, rankDiff);			rankDiffs, args[0]);
	Value greaterRankOperandExtent = b.create<tensor::ExtractOp>(
	loc, greaterRankOperand, outputDimension);			b.create<tensor::YieldOp>(loc, broadcastedDim);
	// The initial dimensions of the greater-rank operand are unchallenged,			})
	// so we can take them as-is. Otherwise, we need to do a comparison.			->getResults());
				herhutUnsubmitted Not Done Reply Inline Actions Does it still make sense to have this in an extra function? herhut: Does it still make sense to have this in an extra function?
				tpoppAuthorUnsubmitted Done Reply Inline Actions I refactored to have only part of this function separate as it's convenient or use in the broadcastable follow up tpopp: I refactored to have only part of this function separate as it's convenient or use in the…
	// We need an actual branch here (instead of a select) because the
	// lesser-rank operand might be rank 0, so any tensor.extract would be
	// invalid.
	auto ifOp = b.create<IfOp>(
	loc, TypeRange{indexTy}, isUnchallengedDimension,
	[&](OpBuilder &b, Location loc) {
	b.create<scf::YieldOp>(loc, greaterRankOperandExtent);
	},
	[&](OpBuilder &b, Location loc) {
	// The broadcasting logic is:
	// - if one extent (here we arbitrarily choose the extent from
	// the greater-rank operand) is equal to 1, then take the extent
	// from the other operand
	// - otherwise, take the extent as-is.
	// Note that this logic remains correct in the presence of
	// dimensions of zero extent.
	Value lesserRankOperandDimension =
	b.create<SubIOp>(loc, indexTy, outputDimension, rankDiff);
	Value lesserRankOperandExtent = b.create<tensor::ExtractOp>(
	loc, lesserRankOperand,
	ValueRange{lesserRankOperandDimension});
	Value greaterRankOperandExtentIsOne = b.create<CmpIOp>(
	loc, CmpIPredicate::eq, greaterRankOperandExtent, one);
	Value broadcastedExtent = b.create<SelectOp>(
	loc, greaterRankOperandExtentIsOne, lesserRankOperandExtent,
	greaterRankOperandExtent);
	b.create<scf::YieldOp>(loc, broadcastedExtent);
	});
	b.create<tensor::YieldOp>(loc, ifOp.getResult(0));
	});
	return success();			return success();
	}			}

	namespace {			namespace {
	class ConstShapeOpConverter : public OpConversionPattern<ConstShapeOp> {			class ConstShapeOpConverter : public OpConversionPattern<ConstShapeOp> {
	public:			public:
	using OpConversionPattern<ConstShapeOp>::OpConversionPattern;			using OpConversionPattern<ConstShapeOp>::OpConversionPattern;

	LogicalResult			LogicalResult
	matchAndRewrite(ConstShapeOp op, ArrayRef<Value> operands,			matchAndRewrite(ConstShapeOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const override;			ConversionPatternRewriter &rewriter) const override;
	};			};
	} // namespace			} // namespace

	LogicalResult ConstShapeOpConverter::matchAndRewrite(			LogicalResult ConstShapeOpConverter::matchAndRewrite(
	ConstShapeOp op, ArrayRef<Value> operands,			ConstShapeOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const {			ConversionPatternRewriter &rewriter) const {

	// For now, this lowering supports only extent tensors, not `shape.shape`			// For now, this lowering supports only extent tensors, not `shape.shape`
	// types.			// types.
	if (op.getType().isa<ShapeType>())			if (op.getType().isa<ShapeType>())
				jpienaarUnsubmitted Done Reply Inline Actions Could more be reused here? E.g., is the nary one (excluding computing the max rank) less efficient for the binary case? Or vice versa, is multiple binary case applications less efficient than nary lowering? jpienaar: Could more be reused here? E.g., is the nary one (excluding computing the max rank) less…
				herhutUnsubmitted Done Reply Inline Actions Thinking of it, maybe doing multiple 2d broadcasts in a row (in the implementation, not the op) would yield similar performance. herhut: Thinking of it, maybe doing multiple 2d broadcasts in a row (in the implementation, not the op)…
				tpoppAuthorUnsubmitted Done Reply Inline Actions I think performance differences between the two are negligible. We already fully unroll the `tensor::GenerateOp`, so they will be roughly the same. For the binary case, we need a starting value during reductions in the nary lowering (without making the c++ code much more complex) while the binary case can skip that step. On the other hand, the binary case might recompute a small amount of work between each invocation for more than 2 inputs. Technically the nary case has the potential to be just as performant. I'm just not sure how clean I can make it look. I personally find the n-ary lowering to be easier to read, and I think Stephan has in the past said he expected the binary case was easier to read. I think we should choose the implementation that we find easier to read and stick with that. I would like to hear your opinions on if you agree or not, and which you find easier to read. tpopp: I think performance differences between the two are negligible. We already fully unroll the…
				herhutUnsubmitted Done Reply Inline Actions I found the binary case easier because my n-ary case looked like a mess. Looking at your code, this is much nicer. So let's ship the n-ary case only. The performance difference should be negligible. Could you also extend the `cstr_broadcastable` accordingly? They need to be in sync otherwise `broadcast` cannot really be used. herhut: I found the binary case easier because my n-ary case looked like a mess. Looking at your code…
				tpoppAuthorUnsubmitted Done Reply Inline Actions Binary is removed. I'll extend `cstr_broadcastable` in a follow up CL. tpopp: Binary is removed. I'll extend `cstr_broadcastable` in a follow up CL.
	return failure();			return failure();

	auto loc = op.getLoc();			auto loc = op.getLoc();
	SmallVector<Value, 4> extentOperands;			SmallVector<Value, 4> extentOperands;
	for (auto extent : op.shape()) {			for (auto extent : op.shape()) {
	extentOperands.push_back(			extentOperands.push_back(
	rewriter.create<ConstantIndexOp>(loc, extent.getLimitedValue()));			rewriter.create<ConstantIndexOp>(loc, extent.getLimitedValue()));
	}			}
	▲ Show 20 Lines • Show All 443 Lines • Show Last 20 Lines

mlir/lib/Dialect/Shape/IR/Shape.cpp

Show First 20 Lines • Show All 346 Lines • ▼ Show 20 Lines	if (op.getNumOperands() == 0)
return op.emitOpError("no operands specified");		return op.emitOpError("no operands specified");

return success();		return success();
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// BroadcastOp		// BroadcastOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

		herhutUnsubmitted Done Reply Inline Actions Is this intended? herhut: Is this intended?
OpFoldResult BroadcastOp::fold(ArrayRef<Attribute> operands) {		OpFoldResult BroadcastOp::fold(ArrayRef<Attribute> operands) {
if (!operands[1])		if (!operands[1])
return nullptr;		return nullptr;

		// TODO: Support folding with more than 2 input shapes
		if (operands.size() > 2 && !operands[2].isa<StringAttr>())
		return nullptr;

auto rhsShape = llvm::to_vector<6>(		auto rhsShape = llvm::to_vector<6>(
operands[1].cast<DenseIntElementsAttr>().getValues<int64_t>());		operands[1].cast<DenseIntElementsAttr>().getValues<int64_t>());
if (rhsShape.empty())		if (rhsShape.empty())
return lhs();		return shapes()[0];

if (!operands[0])		if (!operands[0])
return nullptr;		return nullptr;

auto lhsShape = llvm::to_vector<6>(		auto lhsShape = llvm::to_vector<6>(
operands[0].cast<DenseIntElementsAttr>().getValues<int64_t>());		operands[0].cast<DenseIntElementsAttr>().getValues<int64_t>());
if (lhsShape.empty())		if (lhsShape.empty())
return rhs();		return shapes()[1];

SmallVector<int64_t, 6> resultShape;		SmallVector<int64_t, 6> resultShape;
// If the shapes are not compatible, we can't fold it.		// If the shapes are not compatible, we can't fold it.
// TODO: Fold to an "error".		// TODO: Fold to an "error".
if (!OpTrait::util::getBroadcastedShape(lhsShape, rhsShape, resultShape))		if (!OpTrait::util::getBroadcastedShape(lhsShape, rhsShape, resultShape))
return nullptr;		return nullptr;
Builder builder(getContext());		Builder builder(getContext());
return builder.getIndexTensorAttr(resultShape);		return builder.getIndexTensorAttr(resultShape);
▲ Show 20 Lines • Show All 660 Lines • Show Last 20 Lines

mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir

	Show First 20 Lines • Show All 299 Lines • ▼ Show 20 Lines
	func @broadcast(%a : tensor<?xindex>, %b : !shape.shape) -> !shape.shape {			func @broadcast(%a : tensor<?xindex>, %b : !shape.shape) -> !shape.shape {
	// CHECK: shape.broadcast			// CHECK: shape.broadcast
	%c = shape.broadcast %a, %b : tensor<?xindex>, !shape.shape -> !shape.shape			%c = shape.broadcast %a, %b : tensor<?xindex>, !shape.shape -> !shape.shape
	return %c : !shape.shape			return %c : !shape.shape
	}			}

	// -----			// -----

	// CHECK-LABEL: func @broadcast_unknown_extents(
	// CHECK-SAME: %[[LHS:.*]]: tensor<?xindex>,
	// CHECK-SAME: %[[RHS:.*]]: tensor<?xindex>) {
	func @broadcast_unknown_extents(%a : tensor<?xindex>, %b : tensor<?xindex>) {
	// CHECK: %[[C0:.*]] = constant 0 : index
	// CHECK: %[[C1:.*]] = constant 1 : index
	// CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor<?xindex>
	// CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor<?xindex>
	// CHECK: %[[LHS_RANK_ULE:.*]] = cmpi ule, %[[LHS_RANK]], %[[RHS_RANK]] : index
	// CHECK: %[[LESSER_RANK:.*]] = select %[[LHS_RANK_ULE]], %[[LHS_RANK]], %[[RHS_RANK]] : index
	// CHECK: %[[GREATER_RANK:.*]] = select %[[LHS_RANK_ULE]], %[[RHS_RANK]], %[[LHS_RANK]] : index
	// CHECK: %[[ERASED_LHS:.*]] = tensor.cast %[[LHS]] : tensor<?xindex> to tensor<?xindex>
	// CHECK: %[[ERASED_RHS:.*]] = tensor.cast %[[RHS]] : tensor<?xindex> to tensor<?xindex>
	// CHECK: %[[LESSER_RANK_OPERAND:.*]] = select %[[LHS_RANK_ULE]], %[[ERASED_LHS]], %[[ERASED_RHS]] : tensor<?xindex>
	// CHECK: %[[GREATER_RANK_OPERAND:.*]] = select %[[LHS_RANK_ULE]], %[[ERASED_RHS]], %[[ERASED_LHS]] : tensor<?xindex>
	// CHECK: %[[RANK_DIFF:.*]] = subi %[[GREATER_RANK]], %[[LESSER_RANK]] : index
	// CHECK: %[[RESULT:.*]] = tensor.generate %[[GREATER_RANK]] {
	// CHECK: ^bb0(%[[OUTPUT_DIMENSION:.*]]: index):
	// CHECK: %[[IS_UNCHALLENGED_DIMENSION:.*]] = cmpi ult, %[[OUTPUT_DIMENSION]], %[[RANK_DIFF]] : index
	// CHECK: %[[GREATER_RANK_OPERAND_EXTENT:.*]] = tensor.extract %[[GREATER_RANK_OPERAND]][%[[OUTPUT_DIMENSION]]] : tensor<?xindex>
	// CHECK: %[[OUTPUT_EXTENT:.*]] = scf.if %[[IS_UNCHALLENGED_DIMENSION]] -> (index) {
	// CHECK: scf.yield %[[GREATER_RANK_OPERAND_EXTENT]] : index
	// CHECK: } else {
	// CHECK: %[[LESSER_RANK_OPERAND_DIMENSION:.*]] = subi %[[OUTPUT_DIMENSION]], %[[RANK_DIFF]] : index
	// CHECK: %[[LESSER_RANK_OPERAND_EXTENT:.*]] = tensor.extract %[[LESSER_RANK_OPERAND]][%[[LESSER_RANK_OPERAND_DIMENSION]]] : tensor<?xindex>
	// CHECK: %[[GREATER_RANK_OPERAND_EXTENT_IS_ONE:.*]] = cmpi eq, %[[GREATER_RANK_OPERAND_EXTENT]], %[[C1]] : index
	// CHECK: %[[BROADCASTED_EXTENT:.*]] = select %[[GREATER_RANK_OPERAND_EXTENT_IS_ONE]], %[[LESSER_RANK_OPERAND_EXTENT]], %[[GREATER_RANK_OPERAND_EXTENT]] : index
	// CHECK: scf.yield %[[BROADCASTED_EXTENT]] : index
	// CHECK: }
	// CHECK: yield %[[OUTPUT_EXTENT:.*]] : index
	// CHECK: } : tensor<?xindex>
	// CHECK: return
	// CHECK: }
	%0 = shape.broadcast %a, %b
	: tensor<?xindex>, tensor<?xindex> -> tensor<?xindex>
	return
	}

	// -----

	// CHECK-LABEL: func @broadcast_known_different_extents(
	// CHECK-SAME: %[[LHS:.*]]: tensor<2xindex>,
	// CHECK-SAME: %[[RHS:.*]]: tensor<3xindex>) {
	func @broadcast_known_different_extents(%a : tensor<2xindex>, %b : tensor<3xindex>) {
	// CHECK: %[[C0:.*]] = constant 0 : index
	// CHECK: %[[C1:.*]] = constant 1 : index
	// CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor<2xindex>
	// CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor<3xindex>
	// CHECK: %[[LHS_RANK_ULE:.*]] = cmpi ule, %[[LHS_RANK]], %[[RHS_RANK]] : index
	// CHECK: %[[LESSER_RANK:.*]] = select %[[LHS_RANK_ULE]], %[[LHS_RANK]], %[[RHS_RANK]] : index
	// CHECK: %[[GREATER_RANK:.*]] = select %[[LHS_RANK_ULE]], %[[RHS_RANK]], %[[LHS_RANK]] : index
	// CHECK: %[[ERASED_LHS:.*]] = tensor.cast %[[LHS]] : tensor<2xindex> to tensor<?xindex>
	// CHECK: %[[ERASED_RHS:.*]] = tensor.cast %[[RHS]] : tensor<3xindex> to tensor<?xindex>
	// CHECK: %[[LESSER_RANK_OPERAND:.*]] = select %[[LHS_RANK_ULE]], %[[ERASED_LHS]], %[[ERASED_RHS]] : tensor<?xindex>
	// CHECK: %[[GREATER_RANK_OPERAND:.*]] = select %[[LHS_RANK_ULE]], %[[ERASED_RHS]], %[[ERASED_LHS]] : tensor<?xindex>
	// CHECK: %[[RANK_DIFF:.*]] = subi %[[GREATER_RANK]], %[[LESSER_RANK]] : index
	// CHECK: %[[RESULT:.*]] = tensor.generate %[[GREATER_RANK]] {
	// CHECK: ^bb0(%[[OUTPUT_DIMENSION:.*]]: index):
	// CHECK: %[[IS_UNCHALLENGED_DIMENSION:.*]] = cmpi ult, %[[OUTPUT_DIMENSION]], %[[RANK_DIFF]] : index
	// CHECK: %[[GREATER_RANK_OPERAND_EXTENT:.*]] = tensor.extract %[[GREATER_RANK_OPERAND]][%[[OUTPUT_DIMENSION]]] : tensor<?xindex>
	// CHECK: %[[OUTPUT_EXTENT:.*]] = scf.if %[[IS_UNCHALLENGED_DIMENSION]] -> (index) {
	// CHECK: scf.yield %[[GREATER_RANK_OPERAND_EXTENT]] : index
	// CHECK: } else {
	// CHECK: %[[LESSER_RANK_OPERAND_DIMENSION:.*]] = subi %[[OUTPUT_DIMENSION]], %[[RANK_DIFF]] : index
	// CHECK: %[[LESSER_RANK_OPERAND_EXTENT:.*]] = tensor.extract %[[LESSER_RANK_OPERAND]][%[[LESSER_RANK_OPERAND_DIMENSION]]] : tensor<?xindex>
	// CHECK: %[[GREATER_RANK_OPERAND_EXTENT_IS_ONE:.*]] = cmpi eq, %[[GREATER_RANK_OPERAND_EXTENT]], %[[C1]] : index
	// CHECK: %[[BROADCASTED_EXTENT:.*]] = select %[[GREATER_RANK_OPERAND_EXTENT_IS_ONE]], %[[LESSER_RANK_OPERAND_EXTENT]], %[[GREATER_RANK_OPERAND_EXTENT]] : index
	// CHECK: scf.yield %[[BROADCASTED_EXTENT]] : index
	// CHECK: }
	// CHECK: yield %[[OUTPUT_EXTENT:.*]] : index
	// CHECK: } : tensor<?xindex>
	// CHECK: return
	// CHECK: }
	%0 = shape.broadcast %a, %b
	: tensor<2xindex>, tensor<3xindex> -> tensor<?xindex>
	return
	}

	// -----

	func @try_is_broadcastable(%a : tensor<3xindex>, %b : tensor<?xindex>) -> i1 {			func @try_is_broadcastable(%a : tensor<3xindex>, %b : tensor<?xindex>) -> i1 {
	%0 = shape.is_broadcastable %a, %b : tensor<3xindex>, tensor<?xindex>			%0 = shape.is_broadcastable %a, %b : tensor<3xindex>, tensor<?xindex>
	return %0 : i1			return %0 : i1
	}			}

	// CHECK-LABEL: func @try_is_broadcastable(			// CHECK-LABEL: func @try_is_broadcastable(
	// CHECK-SAME: %[[LHS:.*]]: tensor<3xindex>,			// CHECK-SAME: %[[LHS:.*]]: tensor<3xindex>,
	// CHECK-SAME: %[[RHS:.*]]: tensor<?xindex>) -> i1 {			// CHECK-SAME: %[[RHS:.*]]: tensor<?xindex>) -> i1 {
	▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines
	// CHECK: %[[EITHER_EXTENT_IS_ONE:.*]] = or %[[LARGER_EXTENT_IS_ONE]], %[[SMALLER_EXTENT_IS_ONE]] : i1			// CHECK: %[[EITHER_EXTENT_IS_ONE:.*]] = or %[[LARGER_EXTENT_IS_ONE]], %[[SMALLER_EXTENT_IS_ONE]] : i1
	// CHECK: %[[OR_EXTENTS_ARE_EQUAL:.*]] = or %[[EITHER_EXTENT_IS_ONE]], %[[EXTENTS_ARE_EQUAL]] : i1			// CHECK: %[[OR_EXTENTS_ARE_EQUAL:.*]] = or %[[EITHER_EXTENT_IS_ONE]], %[[EXTENTS_ARE_EQUAL]] : i1
	// CHECK: %[[NEW_ALL_SO_FAR:.*]] = and %[[ALL_SO_FAR]], %[[OR_EXTENTS_ARE_EQUAL]] : i1			// CHECK: %[[NEW_ALL_SO_FAR:.*]] = and %[[ALL_SO_FAR]], %[[OR_EXTENTS_ARE_EQUAL]] : i1
	// CHECK: scf.yield %[[NEW_ALL_SO_FAR]] : i1			// CHECK: scf.yield %[[NEW_ALL_SO_FAR]] : i1
	// CHECK: }			// CHECK: }
	// CHECK: %[[RESULT:.*]] = shape.cstr_require %[[ALL_RESULT]], "required broadcastable shapes"			// CHECK: %[[RESULT:.*]] = shape.cstr_require %[[ALL_RESULT]], "required broadcastable shapes"
	// CHECK: return %[[RESULT]] : !shape.witness			// CHECK: return %[[RESULT]] : !shape.witness
	// CHECK: }			// CHECK: }

				// -----

				func @broadcast_3_shapes_different_extents(%a : tensor<2xindex>,
				%b : tensor<3xindex>,
				%c : tensor<2xindex>) {
				// CHECK-LABEL: func @broadcast_3_shapes_different_extents(
				// CHECK-SAME: %[[ARG0:.*]]: tensor<2xindex>,
				// CHECK-SAME: %[[ARG1:.*]]: tensor<3xindex>,
				// CHECK-SAME: %[[ARG2:.*]]: tensor<2xindex>) {
				// CHECK: %[[C0:.*]] = constant 0 : index
				// CHECK: %[[RANK0:.*]] = dim %[[ARG0]], %[[C0]] : tensor<2xindex>
				// CHECK: %[[RANK1:.*]] = dim %[[ARG1]], %[[C0]] : tensor<3xindex>
				// CHECK: %[[RANK2:.*]] = dim %[[ARG2]], %[[C0]] : tensor<2xindex>
				// CHECK: %[[CMP0:.*]] = cmpi ugt, %[[RANK1]], %[[RANK0]] : index
				// CHECK: %[[LARGER_DIM:.*]] = select %[[CMP0]], %[[RANK1]], %[[RANK0]] : index
				// CHECK: %[[CMP1:.*]] = cmpi ugt, %[[RANK2]], %[[LARGER_DIM]] : index
				// CHECK: %[[MAX_RANK:.*]] = select %[[CMP1]], %[[RANK2]], %[[LARGER_DIM]] : index
				// CHECK: %[[DIM_DIFF0:.*]] = subi %[[MAX_RANK]], %[[RANK0]] : index
				// CHECK: %[[DIM_DIFF1:.*]] = subi %[[MAX_RANK]], %[[RANK1]] : index
				// CHECK: %[[DIM_DIFF2:.*]] = subi %[[MAX_RANK]], %[[RANK2]] : index
				// CHECK: %[[RESULT:.*]] = tensor.generate %[[MAX_RANK]] {
				// CHECK: ^bb0(%[[IDX:.*]]: index):
				// CHECK: %[[C1:.*]] = constant 1 : index
				// CHECK: %[[OUTBOUNDS0:.*]] = cmpi ult, %[[IDX]], %[[DIM_DIFF0]] : index
				// CHECK: %[[DIM0:.*]] = scf.if %[[OUTBOUNDS0]] -> (index) {
				// CHECK: scf.yield %[[C1]] : index
				// CHECK: } else {
				// CHECK: %[[IDX0:.*]] = subi %[[IDX]], %[[DIM_DIFF0]] : index
				// CHECK: %[[EXTRACTED_0:.*]] = tensor.extract %[[ARG0]]{{\[}}%[[IDX0]]] : tensor<2xindex>
				// CHECK: %[[DIM0_IS_1:.]] = cmpi eq, %[[EXTRACTED_0:.]], %[[C1]] : index
				// CHECK: %[[MAX_DIM0:.*]] = select %[[DIM0_IS_1]], %[[C1]], %[[EXTRACTED_0]] : index
				// CHECK: }
				// CHECK: %[[VAL_28:.*]] = cmpi ult, %[[IDX]], %[[DIM_DIFF1]] : index
				// CHECK: %[[DIM1:.*]] = scf.if %[[VAL_28]] -> (index) {
				// CHECK: scf.yield %[[DIM0]] : index
				// CHECK: } else {
				// CHECK: %[[IDX1:.*]] = subi %[[IDX]], %[[DIM_DIFF1]] : index
				// CHECK: %[[EXTRACTED_1:.*]] = tensor.extract %[[ARG1]]{{\[}}%[[IDX1]]] : tensor<3xindex>
				// CHECK: %[[DIM1_IS_1:.]] = cmpi eq, %[[EXTRACTED_1:.]], %[[C1]] : index
				// CHECK: %[[MAX_DIM1:.*]] = select %[[DIM1_IS_1]], %[[DIM0]], %[[EXTRACTED_1]] : index
				// CHECK: }
				// CHECK: %[[VAL_36:.*]] = cmpi ult, %[[IDX]], %[[DIM_DIFF2]] : index
				// CHECK: %[[DIM2:.*]] = scf.if %[[VAL_36]] -> (index) {
				// CHECK: scf.yield %[[DIM1]] : index
				// CHECK: } else {
				// CHECK: %[[IDX2:.*]] = subi %[[IDX]], %[[DIM_DIFF2]] : index
				// CHECK: %[[EXTRACTED_2:.*]] = tensor.extract %[[ARG2]]{{\[}}%[[IDX2]]] : tensor<2xindex>
				// CHECK: %[[DIM2_IS_1:.]] = cmpi eq, %[[EXTRACTED_2:.]], %[[C1]] : index
				// CHECK: %[[MAX_DIM2:.*]] = select %[[DIM2_IS_1]], %[[DIM1]], %[[EXTRACTED_2]] : index
				// CHECK: }
				// CHECK: tensor.yield %[[DIM2]] : index
				// CHECK: } : tensor<?xindex>
				// CHECK: return
				// CHECK: }
				%0 = shape.broadcast %a, %b, %c
				: tensor<2xindex>, tensor<3xindex>, tensor<2xindex> -> tensor<?xindex>
				return
				}

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][shape] Generalize broadcast to a variadic number of shapes
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 322376

mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td

mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp

mlir/lib/Dialect/Shape/IR/Shape.cpp

mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][shape] Generalize broadcast to a variadic number of shapesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 322376

mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td

mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp

mlir/lib/Dialect/Shape/IR/Shape.cpp

mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir

[mlir][shape] Generalize broadcast to a variadic number of shapes
ClosedPublic