Diff 322092

mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td

Show First 20 Lines • Show All 66 Lines • ▼ Show 20 Lines	let description = [{
In case the resulting shape is undefined, i.e. if corresponding extents are		In case the resulting shape is undefined, i.e. if corresponding extents are
different from each other but none is 1, the result is an error shape.		different from each other but none is 1, the result is an error shape.
Likewise error values are propagated if any of the operands holds an error		Likewise error values are propagated if any of the operands holds an error
value. If the result type is an extent tensor (and can therefore not hold		value. If the result type is an extent tensor (and can therefore not hold
the error value) the behavior may be undefined. The optional string		the error value) the behavior may be undefined. The optional string
attribute can be used to describe the error case.		attribute can be used to describe the error case.
}];		}];

let arguments = (ins Shape_ShapeOrExtentTensorType:$lhs,		let arguments = (ins Variadic<Shape_ShapeOrExtentTensorType>:$shapes,
Shape_ShapeOrExtentTensorType:$rhs,
OptionalAttr<StrAttr>:$error);		OptionalAttr<StrAttr>:$error);
let results = (outs Shape_ShapeOrExtentTensorType:$result);		let results = (outs Shape_ShapeOrExtentTensorType:$result);

let assemblyFormat = [{		let assemblyFormat = [{
$lhs `,` $rhs attr-dict `:` type($lhs) `,` type($rhs) `->` type($result)		$shapes attr-dict `:` type($shapes) `->` type($result)
}];		}];

let verifier = [{ return ::verifyShapeOrExtentTensorOp(*this); }];		let builders = [OpBuilderDAG<(ins "::mlir::Type":$result,
let hasFolder = 1;		"::mlir::Value":$lhs, "::mlir::Value":$rhs,
		"/optional/ ::mlir::StringAttr":$error), [{
		build($_builder, $_state, result, ::llvm::makeArrayRef({lhs, rhs}), error);
		}]>
		];
		jpienaarUnsubmitted Done Reply Inline Actions Do we have places that use this form? jpienaar: Do we have places that use this form?
		tpoppAuthorUnsubmitted Done Reply Inline Actions This was the original form of the build method before making the number of inputs variadic. I thought it might be nice to still have it for the common case and to make the transition essentially an NFC tpopp: This was the original form of the build method before making the number of inputs variadic. I…

let verifier = [{ return ::verifyShapeOrExtentTensorOp(*this); }];		let hasFolder = 1;
		let verifier = [{
		return success(succeeded(::verifyShapeOrExtentTensorOp(*this)) &&
		getNumOperands() >= 2);
		}];
}		}

def Shape_ConstShapeOp : Shape_Op<"const_shape", [ConstantLike, NoSideEffect]> {		def Shape_ConstShapeOp : Shape_Op<"const_shape", [ConstantLike, NoSideEffect]> {
let summary = "Creates a constant shape or extent tensor";		let summary = "Creates a constant shape or extent tensor";
let description = [{		let description = [{
Creates a constant shape or extent tensor. The individual extents are given		Creates a constant shape or extent tensor. The individual extents are given
as the `shape` attribute. The number of these values equals the shape's		as the `shape` attribute. The number of these values equals the shape's
rank.		rank.
▲ Show 20 Lines • Show All 750 Lines • Show Last 20 Lines

mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp

	//===- ShapeToStandard.cpp - conversion from Shape to Standard dialect ----===//			//===- ShapeToStandard.cpp - conversion from Shape to Standard dialect ----===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h"			#include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h"

	#include "../PassDetail.h"			#include "../PassDetail.h"
	#include "mlir/Dialect/SCF/SCF.h"			#include "mlir/Dialect/SCF/SCF.h"
	#include "mlir/Dialect/Shape/IR/Shape.h"			#include "mlir/Dialect/Shape/IR/Shape.h"
	#include "mlir/Dialect/StandardOps/IR/Ops.h"			#include "mlir/Dialect/StandardOps/IR/Ops.h"
	#include "mlir/Dialect/Tensor/IR/Tensor.h"			#include "mlir/Dialect/Tensor/IR/Tensor.h"
	#include "mlir/IR/BlockAndValueMapping.h"			#include "mlir/IR/BlockAndValueMapping.h"
				#include "mlir/IR/ImplicitLocOpBuilder.h"
	#include "mlir/Transforms/DialectConversion.h"			#include "mlir/Transforms/DialectConversion.h"
				#include "llvm/ADT/STLExtras.h"

	using namespace mlir;			using namespace mlir;
	using namespace mlir::shape;			using namespace mlir::shape;
	using namespace mlir::scf;			using namespace mlir::scf;

	/// Conversion patterns.			/// Conversion patterns.
	namespace {			namespace {
	class AnyOpConversion : public OpConversionPattern<AnyOp> {			class AnyOpConversion : public OpConversionPattern<AnyOp> {
	▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines
	namespace {			namespace {
	struct BroadcastOpConverter : public OpConversionPattern<BroadcastOp> {			struct BroadcastOpConverter : public OpConversionPattern<BroadcastOp> {
	using OpConversionPattern<BroadcastOp>::OpConversionPattern;			using OpConversionPattern<BroadcastOp>::OpConversionPattern;

	LogicalResult			LogicalResult
	matchAndRewrite(BroadcastOp op, ArrayRef<Value> operands,			matchAndRewrite(BroadcastOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const override;			ConversionPatternRewriter &rewriter) const override;
	};			};

				// The generic lowering of Broadcast given any number of inputs.
				Operation::result_range naryBroadcastLowering(BroadcastOp::Adaptor &op,
				jpienaarUnsubmitted Done Reply Inline Actions The last sentence is true for all rewrites, so redundant here. jpienaar: The last sentence is true for all rewrites, so redundant here.
				ImplicitLocOpBuilder lb) {
				assert(llvm::all_of(op.shapes(),
				[](Value v) { return !v.getType().isa<ShapeType>(); }));

				Value zero = lb.create<ConstantIndexOp>(0);
				Value one = lb.create<ConstantIndexOp>(1);
				Type indexTy = lb.getIndexType();

				// Save all the ranks for bounds checking. Because this is a tensor
				// representing the shape extents, the rank is the extent of the only
				jpienaarUnsubmitted Done Reply Inline Actions A comment here would be good to explain why dim is used to get rank. Also does dim work on a shape? (Tensor yes, and so are we guaranteed we'd be in tensor world here?) jpienaar: A comment here would be good to explain why dim is used to get rank. Also does dim work on a…
				tpoppAuthorUnsubmitted Done Reply Inline Actions This was guaranteed in the caller. I'll replicate the check here though in case of future uses from other locations. tpopp: This was guaranteed in the caller. I'll replicate the check here though in case of future uses…
				// dimension in the tensor.
				SmallVector<Value> ranks, rankDiffs;
				llvm::append_range(ranks, llvm::map_range(op.shapes(), [&](Value v) {
				return lb.create<DimOp>(v, zero);
				}));

				// Find the maximum rank
				Value maxRank = ranks.front();
				for (Value v : llvm::drop_begin(ranks, 1)) {
				Value rankIsGreater = lb.create<CmpIOp>(CmpIPredicate::ugt, v, maxRank);
				maxRank = lb.create<SelectOp>(rankIsGreater, v, maxRank);
				}

				// Calculate the difference of ranks and the maximum rank for later offsets.
				llvm::append_range(rankDiffs, llvm::map_range(ranks, [&](Value v) {
				return lb.create<SubIOp>(indexTy, maxRank, v);
				}));

				return lb
				.create<tensor::GenerateOp>(
				getExtentTensorType(lb.getContext()), ValueRange{maxRank},
				[&](OpBuilder &b, Location loc, ValueRange args) {
				Value outputDimension = args[0];

				Value reduceDim = one;
				herhutUnsubmitted Done Reply Inline Actions `inBound` is confusing. It is true if we are outside the bounds of the index, right? herhut: `inBound` is confusing. It is true if we are outside the bounds of the index, right?
				for (auto tup : llvm::zip(op.shapes(), rankDiffs)) {
				Value shape = std::get<0>(tup);
				Value rankDiff = std::get<1>(tup);
				Value outOfBounds = b.create<CmpIOp>(loc, CmpIPredicate::ult,
				outputDimension, rankDiff);
				reduceDim =
				b.create<IfOp>(
				loc, TypeRange{indexTy}, outOfBounds,
				[&](OpBuilder &b, Location loc) {
				b.create<scf::YieldOp>(loc, reduceDim);
				},
				[&](OpBuilder &b, Location loc) {
				// The broadcasting logic is:
				// - if one extent (here we arbitrarily choose the
				// extent from the greater-rank operand) is equal to 1,
				herhutUnsubmitted Done Reply Inline Actions This sentence ends abruptly. herhut: This sentence ends abruptly.
				// then take the extent from the other operand
				// - otherwise, take the extent as-is.
				// Note that this logic remains correct in the presence
				// of dimensions of zero extent.
				Value lesserRankOperandDimension = b.create<SubIOp>(
				loc, indexTy, outputDimension, rankDiff);
				Value lesserRankOperandExtent =
				b.create<tensor::ExtractOp>(
				loc, shape,
				ValueRange{lesserRankOperandDimension});

				Value dimIsOne =
				herhutUnsubmitted Done Reply Inline Actions You could fold this up. In the then case, return `reduceDim`. In the else case, do the select. herhut: You could fold this up. In the then case, return `reduceDim`. In the else case, do the select.
				b.create<CmpIOp>(loc, CmpIPredicate::eq,
				lesserRankOperandExtent, one);
				Value dim = b.create<SelectOp>(
				loc, dimIsOne, reduceDim, lesserRankOperandExtent);
				b.create<scf::YieldOp>(loc, dim);
				})
				.getResult(0);

				// Always give preference to a possibly non-1 extent
				herhutUnsubmitted Done Reply Inline Actions This comment seems lonely. herhut: This comment seems lonely.
				tpoppAuthorUnsubmitted Done Reply Inline Actions Fixed tpopp: Fixed
				}

				b.create<tensor::YieldOp>(loc, reduceDim);
				})
				->getResults();
				}
	} // namespace			} // namespace

	LogicalResult BroadcastOpConverter::matchAndRewrite(			LogicalResult BroadcastOpConverter::matchAndRewrite(
	BroadcastOp op, ArrayRef<Value> operands,			BroadcastOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const {			ConversionPatternRewriter &rewriter) const {
	// For now, this lowering is only defined on `tensor<?xindex>` operands, not			// For now, this lowering is only defined on `tensor<?xindex>` operands, not
	// on shapes.			// on shapes.
	if (op.getType().isa<ShapeType>())			if (op.getType().isa<ShapeType>())
	return failure();			return failure();

	assert(!op.lhs().getType().isa<ShapeType>() &&
	!op.rhs().getType().isa<ShapeType>());
	auto loc = op.getLoc();			auto loc = op.getLoc();
				ImplicitLocOpBuilder lb(loc, rewriter);
	BroadcastOp::Adaptor transformed(operands);			BroadcastOp::Adaptor transformed(operands);
	Value zero = rewriter.create<ConstantIndexOp>(loc, 0);			rewriter.replaceOp(op, naryBroadcastLowering(transformed, lb));
				herhutUnsubmitted Not Done Reply Inline Actions Does it still make sense to have this in an extra function? herhut: Does it still make sense to have this in an extra function?
				tpoppAuthorUnsubmitted Done Reply Inline Actions I refactored to have only part of this function separate as it's convenient or use in the broadcastable follow up tpopp: I refactored to have only part of this function separate as it's convenient or use in the…
	Value one = rewriter.create<ConstantIndexOp>(loc, 1);

	// Find smaller and greater rank and extent tensor.
	Value lhsRank = rewriter.create<DimOp>(loc, op.lhs(), zero);
	Value rhsRank = rewriter.create<DimOp>(loc, op.rhs(), zero);
	Value lhsRankULE =
	rewriter.create<CmpIOp>(loc, CmpIPredicate::ule, lhsRank, rhsRank);
	Type indexTy = rewriter.getIndexType();
	Value lesserRank =
	rewriter.create<SelectOp>(loc, lhsRankULE, lhsRank, rhsRank);
	Value greaterRank =
	rewriter.create<SelectOp>(loc, lhsRankULE, rhsRank, lhsRank);
	auto erasedRankType =
	RankedTensorType::get({ShapedType::kDynamicSize}, indexTy);
	Value rankErasedLhs =
	rewriter.create<tensor::CastOp>(loc, erasedRankType, transformed.lhs());
	Value rankErasedRhs =
	rewriter.create<tensor::CastOp>(loc, erasedRankType, transformed.rhs());
	Value lesserRankOperand =
	rewriter.create<SelectOp>(loc, lhsRankULE, rankErasedLhs, rankErasedRhs);
	Value greaterRankOperand =
	rewriter.create<SelectOp>(loc, lhsRankULE, rankErasedRhs, rankErasedLhs);

	Value rankDiff =
	rewriter.create<SubIOp>(loc, indexTy, greaterRank, lesserRank);
	rewriter.replaceOpWithNewOp<tensor::GenerateOp>(
	op, getExtentTensorType(op.getContext()), ValueRange{greaterRank},
	[&](OpBuilder &b, Location loc, ValueRange args) {
	Value outputDimension = args[0];
	Value isUnchallengedDimension = b.create<CmpIOp>(
	loc, CmpIPredicate::ult, outputDimension, rankDiff);
	Value greaterRankOperandExtent = b.create<tensor::ExtractOp>(
	loc, greaterRankOperand, outputDimension);
	// The initial dimensions of the greater-rank operand are unchallenged,
	// so we can take them as-is. Otherwise, we need to do a comparison.
	// We need an actual branch here (instead of a select) because the
	// lesser-rank operand might be rank 0, so any tensor.extract would be
	// invalid.
	auto ifOp = b.create<IfOp>(
	loc, TypeRange{indexTy}, isUnchallengedDimension,
	[&](OpBuilder &b, Location loc) {
	b.create<scf::YieldOp>(loc, greaterRankOperandExtent);
	},
	[&](OpBuilder &b, Location loc) {
	// The broadcasting logic is:
	// - if one extent (here we arbitrarily choose the extent from
	// the greater-rank operand) is equal to 1, then take the extent
	// from the other operand
	// - otherwise, take the extent as-is.
	// Note that this logic remains correct in the presence of
	// dimensions of zero extent.
	Value lesserRankOperandDimension =
	b.create<SubIOp>(loc, indexTy, outputDimension, rankDiff);
	Value lesserRankOperandExtent = b.create<tensor::ExtractOp>(
	loc, lesserRankOperand,
	ValueRange{lesserRankOperandDimension});
	Value greaterRankOperandExtentIsOne = b.create<CmpIOp>(
	loc, CmpIPredicate::eq, greaterRankOperandExtent, one);
	Value broadcastedExtent = b.create<SelectOp>(
	loc, greaterRankOperandExtentIsOne, lesserRankOperandExtent,
	greaterRankOperandExtent);
	b.create<scf::YieldOp>(loc, broadcastedExtent);
	});
	b.create<tensor::YieldOp>(loc, ifOp.getResult(0));
	});
	return success();			return success();
	}			}

	namespace {			namespace {
	class ConstShapeOpConverter : public OpConversionPattern<ConstShapeOp> {			class ConstShapeOpConverter : public OpConversionPattern<ConstShapeOp> {
	public:			public:
	using OpConversionPattern<ConstShapeOp>::OpConversionPattern;			using OpConversionPattern<ConstShapeOp>::OpConversionPattern;

	LogicalResult			LogicalResult
	matchAndRewrite(ConstShapeOp op, ArrayRef<Value> operands,			matchAndRewrite(ConstShapeOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const override;			ConversionPatternRewriter &rewriter) const override;
	};			};
	} // namespace			} // namespace

	LogicalResult ConstShapeOpConverter::matchAndRewrite(			LogicalResult ConstShapeOpConverter::matchAndRewrite(
	ConstShapeOp op, ArrayRef<Value> operands,			ConstShapeOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const {			ConversionPatternRewriter &rewriter) const {
				jpienaarUnsubmitted Done Reply Inline Actions Could more be reused here? E.g., is the nary one (excluding computing the max rank) less efficient for the binary case? Or vice versa, is multiple binary case applications less efficient than nary lowering? jpienaar: Could more be reused here? E.g., is the nary one (excluding computing the max rank) less…
				herhutUnsubmitted Done Reply Inline Actions Thinking of it, maybe doing multiple 2d broadcasts in a row (in the implementation, not the op) would yield similar performance. herhut: Thinking of it, maybe doing multiple 2d broadcasts in a row (in the implementation, not the op)…
				tpoppAuthorUnsubmitted Done Reply Inline Actions I think performance differences between the two are negligible. We already fully unroll the `tensor::GenerateOp`, so they will be roughly the same. For the binary case, we need a starting value during reductions in the nary lowering (without making the c++ code much more complex) while the binary case can skip that step. On the other hand, the binary case might recompute a small amount of work between each invocation for more than 2 inputs. Technically the nary case has the potential to be just as performant. I'm just not sure how clean I can make it look. I personally find the n-ary lowering to be easier to read, and I think Stephan has in the past said he expected the binary case was easier to read. I think we should choose the implementation that we find easier to read and stick with that. I would like to hear your opinions on if you agree or not, and which you find easier to read. tpopp: I think performance differences between the two are negligible. We already fully unroll the…
				herhutUnsubmitted Done Reply Inline Actions I found the binary case easier because my n-ary case looked like a mess. Looking at your code, this is much nicer. So let's ship the n-ary case only. The performance difference should be negligible. Could you also extend the `cstr_broadcastable` accordingly? They need to be in sync otherwise `broadcast` cannot really be used. herhut: I found the binary case easier because my n-ary case looked like a mess. Looking at your code…
				tpoppAuthorUnsubmitted Done Reply Inline Actions Binary is removed. I'll extend `cstr_broadcastable` in a follow up CL. tpopp: Binary is removed. I'll extend `cstr_broadcastable` in a follow up CL.

	// For now, this lowering supports only extent tensors, not `shape.shape`			// For now, this lowering supports only extent tensors, not `shape.shape`
	// types.			// types.
	if (op.getType().isa<ShapeType>())			if (op.getType().isa<ShapeType>())
	return failure();			return failure();

	auto loc = op.getLoc();			auto loc = op.getLoc();
	SmallVector<Value, 4> extentOperands;			SmallVector<Value, 4> extentOperands;
	▲ Show 20 Lines • Show All 447 Lines • Show Last 20 Lines

mlir/lib/Dialect/Shape/IR/Shape.cpp

Show First 20 Lines • Show All 346 Lines • ▼ Show 20 Lines	if (op.getNumOperands() == 0)
return op.emitOpError("no operands specified");		return op.emitOpError("no operands specified");

return success();		return success();
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// BroadcastOp		// BroadcastOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

		herhutUnsubmitted Done Reply Inline Actions Is this intended? herhut: Is this intended?
OpFoldResult BroadcastOp::fold(ArrayRef<Attribute> operands) {		OpFoldResult BroadcastOp::fold(ArrayRef<Attribute> operands) {
if (!operands[1])		if (!operands[1])
return nullptr;		return nullptr;

		// TODO: Support folding with more than 2 input shapes
		if (operands.size() > 2 && !operands[2].isa<StringAttr>())
		return nullptr;

auto rhsShape = llvm::to_vector<6>(		auto rhsShape = llvm::to_vector<6>(
operands[1].cast<DenseIntElementsAttr>().getValues<int64_t>());		operands[1].cast<DenseIntElementsAttr>().getValues<int64_t>());
if (rhsShape.empty())		if (rhsShape.empty())
return lhs();		return shapes()[0];

if (!operands[0])		if (!operands[0])
return nullptr;		return nullptr;

auto lhsShape = llvm::to_vector<6>(		auto lhsShape = llvm::to_vector<6>(
operands[0].cast<DenseIntElementsAttr>().getValues<int64_t>());		operands[0].cast<DenseIntElementsAttr>().getValues<int64_t>());
if (lhsShape.empty())		if (lhsShape.empty())
return rhs();		return shapes()[1];

SmallVector<int64_t, 6> resultShape;		SmallVector<int64_t, 6> resultShape;
// If the shapes are not compatible, we can't fold it.		// If the shapes are not compatible, we can't fold it.
// TODO: Fold to an "error".		// TODO: Fold to an "error".
if (!OpTrait::util::getBroadcastedShape(lhsShape, rhsShape, resultShape))		if (!OpTrait::util::getBroadcastedShape(lhsShape, rhsShape, resultShape))
return nullptr;		return nullptr;
Builder builder(getContext());		Builder builder(getContext());
return builder.getIndexTensorAttr(resultShape);		return builder.getIndexTensorAttr(resultShape);
▲ Show 20 Lines • Show All 660 Lines • Show Last 20 Lines

mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir

	Show First 20 Lines • Show All 299 Lines • ▼ Show 20 Lines
	func @broadcast(%a : tensor<?xindex>, %b : !shape.shape) -> !shape.shape {			func @broadcast(%a : tensor<?xindex>, %b : !shape.shape) -> !shape.shape {
	// CHECK: shape.broadcast			// CHECK: shape.broadcast
	%c = shape.broadcast %a, %b : tensor<?xindex>, !shape.shape -> !shape.shape			%c = shape.broadcast %a, %b : tensor<?xindex>, !shape.shape -> !shape.shape
	return %c : !shape.shape			return %c : !shape.shape
	}			}

	// -----			// -----

	// CHECK-LABEL: func @broadcast_unknown_extents(
	// CHECK-SAME: %[[LHS:.*]]: tensor<?xindex>,
	// CHECK-SAME: %[[RHS:.*]]: tensor<?xindex>) {
	func @broadcast_unknown_extents(%a : tensor<?xindex>, %b : tensor<?xindex>) {
	// CHECK: %[[C0:.*]] = constant 0 : index
	// CHECK: %[[C1:.*]] = constant 1 : index
	// CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor<?xindex>
	// CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor<?xindex>
	// CHECK: %[[LHS_RANK_ULE:.*]] = cmpi ule, %[[LHS_RANK]], %[[RHS_RANK]] : index
	// CHECK: %[[LESSER_RANK:.*]] = select %[[LHS_RANK_ULE]], %[[LHS_RANK]], %[[RHS_RANK]] : index
	// CHECK: %[[GREATER_RANK:.*]] = select %[[LHS_RANK_ULE]], %[[RHS_RANK]], %[[LHS_RANK]] : index
	// CHECK: %[[ERASED_LHS:.*]] = tensor.cast %[[LHS]] : tensor<?xindex> to tensor<?xindex>
	// CHECK: %[[ERASED_RHS:.*]] = tensor.cast %[[RHS]] : tensor<?xindex> to tensor<?xindex>
	// CHECK: %[[LESSER_RANK_OPERAND:.*]] = select %[[LHS_RANK_ULE]], %[[ERASED_LHS]], %[[ERASED_RHS]] : tensor<?xindex>
	// CHECK: %[[GREATER_RANK_OPERAND:.*]] = select %[[LHS_RANK_ULE]], %[[ERASED_RHS]], %[[ERASED_LHS]] : tensor<?xindex>
	// CHECK: %[[RANK_DIFF:.*]] = subi %[[GREATER_RANK]], %[[LESSER_RANK]] : index
	// CHECK: %[[RESULT:.*]] = tensor.generate %[[GREATER_RANK]] {
	// CHECK: ^bb0(%[[OUTPUT_DIMENSION:.*]]: index):
	// CHECK: %[[IS_UNCHALLENGED_DIMENSION:.*]] = cmpi ult, %[[OUTPUT_DIMENSION]], %[[RANK_DIFF]] : index
	// CHECK: %[[GREATER_RANK_OPERAND_EXTENT:.*]] = tensor.extract %[[GREATER_RANK_OPERAND]][%[[OUTPUT_DIMENSION]]] : tensor<?xindex>
	// CHECK: %[[OUTPUT_EXTENT:.*]] = scf.if %[[IS_UNCHALLENGED_DIMENSION]] -> (index) {
	// CHECK: scf.yield %[[GREATER_RANK_OPERAND_EXTENT]] : index
	// CHECK: } else {
	// CHECK: %[[LESSER_RANK_OPERAND_DIMENSION:.*]] = subi %[[OUTPUT_DIMENSION]], %[[RANK_DIFF]] : index
	// CHECK: %[[LESSER_RANK_OPERAND_EXTENT:.*]] = tensor.extract %[[LESSER_RANK_OPERAND]][%[[LESSER_RANK_OPERAND_DIMENSION]]] : tensor<?xindex>
	// CHECK: %[[GREATER_RANK_OPERAND_EXTENT_IS_ONE:.*]] = cmpi eq, %[[GREATER_RANK_OPERAND_EXTENT]], %[[C1]] : index
	// CHECK: %[[BROADCASTED_EXTENT:.*]] = select %[[GREATER_RANK_OPERAND_EXTENT_IS_ONE]], %[[LESSER_RANK_OPERAND_EXTENT]], %[[GREATER_RANK_OPERAND_EXTENT]] : index
	// CHECK: scf.yield %[[BROADCASTED_EXTENT]] : index
	// CHECK: }
	// CHECK: yield %[[OUTPUT_EXTENT:.*]] : index
	// CHECK: } : tensor<?xindex>
	// CHECK: return
	// CHECK: }
	%0 = shape.broadcast %a, %b
	: tensor<?xindex>, tensor<?xindex> -> tensor<?xindex>
	return
	}

	// -----

	// CHECK-LABEL: func @broadcast_known_different_extents(
	// CHECK-SAME: %[[LHS:.*]]: tensor<2xindex>,
	// CHECK-SAME: %[[RHS:.*]]: tensor<3xindex>) {
	func @broadcast_known_different_extents(%a : tensor<2xindex>, %b : tensor<3xindex>) {
	// CHECK: %[[C0:.*]] = constant 0 : index
	// CHECK: %[[C1:.*]] = constant 1 : index
	// CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor<2xindex>
	// CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor<3xindex>
	// CHECK: %[[LHS_RANK_ULE:.*]] = cmpi ule, %[[LHS_RANK]], %[[RHS_RANK]] : index
	// CHECK: %[[LESSER_RANK:.*]] = select %[[LHS_RANK_ULE]], %[[LHS_RANK]], %[[RHS_RANK]] : index
	// CHECK: %[[GREATER_RANK:.*]] = select %[[LHS_RANK_ULE]], %[[RHS_RANK]], %[[LHS_RANK]] : index
	// CHECK: %[[ERASED_LHS:.*]] = tensor.cast %[[LHS]] : tensor<2xindex> to tensor<?xindex>
	// CHECK: %[[ERASED_RHS:.*]] = tensor.cast %[[RHS]] : tensor<3xindex> to tensor<?xindex>
	// CHECK: %[[LESSER_RANK_OPERAND:.*]] = select %[[LHS_RANK_ULE]], %[[ERASED_LHS]], %[[ERASED_RHS]] : tensor<?xindex>
	// CHECK: %[[GREATER_RANK_OPERAND:.*]] = select %[[LHS_RANK_ULE]], %[[ERASED_RHS]], %[[ERASED_LHS]] : tensor<?xindex>
	// CHECK: %[[RANK_DIFF:.*]] = subi %[[GREATER_RANK]], %[[LESSER_RANK]] : index
	// CHECK: %[[RESULT:.*]] = tensor.generate %[[GREATER_RANK]] {
	// CHECK: ^bb0(%[[OUTPUT_DIMENSION:.*]]: index):
	// CHECK: %[[IS_UNCHALLENGED_DIMENSION:.*]] = cmpi ult, %[[OUTPUT_DIMENSION]], %[[RANK_DIFF]] : index
	// CHECK: %[[GREATER_RANK_OPERAND_EXTENT:.*]] = tensor.extract %[[GREATER_RANK_OPERAND]][%[[OUTPUT_DIMENSION]]] : tensor<?xindex>
	// CHECK: %[[OUTPUT_EXTENT:.*]] = scf.if %[[IS_UNCHALLENGED_DIMENSION]] -> (index) {
	// CHECK: scf.yield %[[GREATER_RANK_OPERAND_EXTENT]] : index
	// CHECK: } else {
	// CHECK: %[[LESSER_RANK_OPERAND_DIMENSION:.*]] = subi %[[OUTPUT_DIMENSION]], %[[RANK_DIFF]] : index
	// CHECK: %[[LESSER_RANK_OPERAND_EXTENT:.*]] = tensor.extract %[[LESSER_RANK_OPERAND]][%[[LESSER_RANK_OPERAND_DIMENSION]]] : tensor<?xindex>
	// CHECK: %[[GREATER_RANK_OPERAND_EXTENT_IS_ONE:.*]] = cmpi eq, %[[GREATER_RANK_OPERAND_EXTENT]], %[[C1]] : index
	// CHECK: %[[BROADCASTED_EXTENT:.*]] = select %[[GREATER_RANK_OPERAND_EXTENT_IS_ONE]], %[[LESSER_RANK_OPERAND_EXTENT]], %[[GREATER_RANK_OPERAND_EXTENT]] : index
	// CHECK: scf.yield %[[BROADCASTED_EXTENT]] : index
	// CHECK: }
	// CHECK: yield %[[OUTPUT_EXTENT:.*]] : index
	// CHECK: } : tensor<?xindex>
	// CHECK: return
	// CHECK: }
	%0 = shape.broadcast %a, %b
	: tensor<2xindex>, tensor<3xindex> -> tensor<?xindex>
	return
	}

	// -----

	func @try_is_broadcastable(%a : tensor<3xindex>, %b : tensor<?xindex>) -> i1 {			func @try_is_broadcastable(%a : tensor<3xindex>, %b : tensor<?xindex>) -> i1 {
	%0 = shape.is_broadcastable %a, %b : tensor<3xindex>, tensor<?xindex>			%0 = shape.is_broadcastable %a, %b : tensor<3xindex>, tensor<?xindex>
	return %0 : i1			return %0 : i1
	}			}

	// CHECK-LABEL: func @try_is_broadcastable(			// CHECK-LABEL: func @try_is_broadcastable(
	// CHECK-SAME: %[[LHS:.*]]: tensor<3xindex>,			// CHECK-SAME: %[[LHS:.*]]: tensor<3xindex>,
	// CHECK-SAME: %[[RHS:.*]]: tensor<?xindex>) -> i1 {			// CHECK-SAME: %[[RHS:.*]]: tensor<?xindex>) -> i1 {
	▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines
	// CHECK: %[[EITHER_EXTENT_IS_ONE:.*]] = or %[[LARGER_EXTENT_IS_ONE]], %[[SMALLER_EXTENT_IS_ONE]] : i1			// CHECK: %[[EITHER_EXTENT_IS_ONE:.*]] = or %[[LARGER_EXTENT_IS_ONE]], %[[SMALLER_EXTENT_IS_ONE]] : i1
	// CHECK: %[[OR_EXTENTS_ARE_EQUAL:.*]] = or %[[EITHER_EXTENT_IS_ONE]], %[[EXTENTS_ARE_EQUAL]] : i1			// CHECK: %[[OR_EXTENTS_ARE_EQUAL:.*]] = or %[[EITHER_EXTENT_IS_ONE]], %[[EXTENTS_ARE_EQUAL]] : i1
	// CHECK: %[[NEW_ALL_SO_FAR:.*]] = and %[[ALL_SO_FAR]], %[[OR_EXTENTS_ARE_EQUAL]] : i1			// CHECK: %[[NEW_ALL_SO_FAR:.*]] = and %[[ALL_SO_FAR]], %[[OR_EXTENTS_ARE_EQUAL]] : i1
	// CHECK: scf.yield %[[NEW_ALL_SO_FAR]] : i1			// CHECK: scf.yield %[[NEW_ALL_SO_FAR]] : i1
	// CHECK: }			// CHECK: }
	// CHECK: %[[RESULT:.*]] = shape.cstr_require %[[ALL_RESULT]], "required broadcastable shapes"			// CHECK: %[[RESULT:.*]] = shape.cstr_require %[[ALL_RESULT]], "required broadcastable shapes"
	// CHECK: return %[[RESULT]] : !shape.witness			// CHECK: return %[[RESULT]] : !shape.witness
	// CHECK: }			// CHECK: }

				// -----

				func @broadcast_3_shapes_different_extents(%a : tensor<2xindex>,
				%b : tensor<3xindex>,
				%c : tensor<2xindex>) {
				// CHECK-LABEL: func @broadcast_3_shapes_different_extents(
				// CHECK-SAME: %[[ARG0:.*]]: tensor<2xindex>,
				// CHECK-SAME: %[[ARG1:.*]]: tensor<3xindex>,
				// CHECK-SAME: %[[ARG2:.*]]: tensor<2xindex>) {
				// CHECK: %[[C0:.*]] = constant 0 : index
				// CHECK: %[[C1:.*]] = constant 1 : index
				// CHECK: %[[RANK0:.*]] = dim %[[ARG0]], %[[C0]] : tensor<2xindex>
				// CHECK: %[[RANK1:.*]] = dim %[[ARG1]], %[[C0]] : tensor<3xindex>
				// CHECK: %[[RANK2:.*]] = dim %[[ARG2]], %[[C0]] : tensor<2xindex>
				// CHECK: %[[CMP0:.*]] = cmpi ugt, %[[RANK1]], %[[RANK0]] : index
				// CHECK: %[[LARGER_DIM:.*]] = select %[[CMP0]], %[[RANK1]], %[[RANK0]] : index
				// CHECK: %[[CMP1:.*]] = cmpi ugt, %[[RANK2]], %[[LARGER_DIM]] : index
				// CHECK: %[[MAX_RANK:.*]] = select %[[CMP1]], %[[RANK2]], %[[LARGER_DIM]] : index
				// CHECK: %[[DIM_DIFF0:.*]] = subi %[[MAX_RANK]], %[[RANK0]] : index
				// CHECK: %[[DIM_DIFF1:.*]] = subi %[[MAX_RANK]], %[[RANK1]] : index
				// CHECK: %[[DIM_DIFF2:.*]] = subi %[[MAX_RANK]], %[[RANK2]] : index
				// CHECK: %[[RESULT:.*]] = tensor.generate %[[MAX_RANK]] {
				// CHECK: ^bb0(%[[IDX:.*]]: index):
				// CHECK: %[[OUTBOUNDS0:.*]] = cmpi ult, %[[IDX]], %[[DIM_DIFF0]] : index
				// CHECK: %[[DIM0:.*]] = scf.if %[[OUTBOUNDS0]] -> (index) {
				// CHECK: scf.yield %[[C1]] : index
				// CHECK: } else {
				// CHECK: %[[IDX0:.*]] = subi %[[IDX]], %[[DIM_DIFF0]] : index
				// CHECK: %[[EXTRACTED_0:.*]] = tensor.extract %[[ARG0]]{{\[}}%[[IDX0]]] : tensor<2xindex>
				// CHECK: %[[DIM0_IS_1:.]] = cmpi eq, %[[EXTRACTED_0:.]], %[[C1]] : index
				// CHECK: %[[MAX_DIM0:.*]] = select %[[DIM0_IS_1]], %[[C1]], %[[EXTRACTED_0]] : index
				// CHECK: }
				// CHECK: %[[VAL_28:.*]] = cmpi ult, %[[IDX]], %[[DIM_DIFF1]] : index
				// CHECK: %[[DIM1:.*]] = scf.if %[[VAL_28]] -> (index) {
				// CHECK: scf.yield %[[DIM0]] : index
				// CHECK: } else {
				// CHECK: %[[IDX1:.*]] = subi %[[IDX]], %[[DIM_DIFF1]] : index
				// CHECK: %[[EXTRACTED_1:.*]] = tensor.extract %[[ARG1]]{{\[}}%[[IDX1]]] : tensor<3xindex>
				// CHECK: %[[DIM1_IS_1:.]] = cmpi eq, %[[EXTRACTED_1:.]], %[[C1]] : index
				// CHECK: %[[MAX_DIM1:.*]] = select %[[DIM1_IS_1]], %[[DIM0]], %[[EXTRACTED_1]] : index
				// CHECK: }
				// CHECK: %[[VAL_36:.*]] = cmpi ult, %[[IDX]], %[[DIM_DIFF2]] : index
				// CHECK: %[[DIM2:.*]] = scf.if %[[VAL_36]] -> (index) {
				// CHECK: scf.yield %[[DIM1]] : index
				// CHECK: } else {
				// CHECK: %[[IDX2:.*]] = subi %[[IDX]], %[[DIM_DIFF2]] : index
				// CHECK: %[[EXTRACTED_2:.*]] = tensor.extract %[[ARG2]]{{\[}}%[[IDX2]]] : tensor<2xindex>
				// CHECK: %[[DIM2_IS_1:.]] = cmpi eq, %[[EXTRACTED_2:.]], %[[C1]] : index
				// CHECK: %[[MAX_DIM2:.*]] = select %[[DIM2_IS_1]], %[[DIM1]], %[[EXTRACTED_2]] : index
				// CHECK: }
				// CHECK: tensor.yield %[[DIM2]] : index
				// CHECK: } : tensor<?xindex>
				// CHECK: return
				// CHECK: }
				%0 = shape.broadcast %a, %b, %c
				: tensor<2xindex>, tensor<3xindex>, tensor<2xindex> -> tensor<?xindex>
				return
				}

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][shape] Generalize broadcast to a variadic number of shapes
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 322092

mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td

mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp

mlir/lib/Dialect/Shape/IR/Shape.cpp

mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][shape] Generalize broadcast to a variadic number of shapesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 322092

mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td

mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp

mlir/lib/Dialect/Shape/IR/Shape.cpp

mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir

[mlir][shape] Generalize broadcast to a variadic number of shapes
ClosedPublic