Diff 320440

mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td

Show First 20 Lines • Show All 66 Lines • ▼ Show 20 Lines	let description = [{
In case the resulting shape is undefined, i.e. if corresponding extents are		In case the resulting shape is undefined, i.e. if corresponding extents are
different from each other but none is 1, the result is an error shape.		different from each other but none is 1, the result is an error shape.
Likewise error values are propagated if any of the operands holds an error		Likewise error values are propagated if any of the operands holds an error
value. If the result type is an extent tensor (and can therefore not hold		value. If the result type is an extent tensor (and can therefore not hold
the error value) the behavior may be undefined. The optional string		the error value) the behavior may be undefined. The optional string
attribute can be used to describe the error case.		attribute can be used to describe the error case.
}];		}];

let arguments = (ins Shape_ShapeOrExtentTensorType:$lhs,		let arguments = (ins Variadic<Shape_ShapeOrExtentTensorType>:$shapes,
Shape_ShapeOrExtentTensorType:$rhs,
OptionalAttr<StrAttr>:$error);		OptionalAttr<StrAttr>:$error);
let results = (outs Shape_ShapeOrExtentTensorType:$result);		let results = (outs Shape_ShapeOrExtentTensorType:$result);

let assemblyFormat = [{		let assemblyFormat = [{
$lhs `,` $rhs attr-dict `:` type($lhs) `,` type($rhs) `->` type($result)		$shapes attr-dict `:` type($shapes) `->` type($result)
}];		}];

let verifier = [{ return ::verifyShapeOrExtentTensorOp(*this); }];		let builders = [OpBuilderDAG<(ins "::mlir::Type":$result,
		jpienaarUnsubmitted Done Reply Inline Actions Do we have places that use this form? jpienaar: Do we have places that use this form?
		tpoppAuthorUnsubmitted Done Reply Inline Actions This was the original form of the build method before making the number of inputs variadic. I thought it might be nice to still have it for the common case and to make the transition essentially an NFC tpopp: This was the original form of the build method before making the number of inputs variadic. I…
let hasFolder = 1;		"::mlir::Value":$lhs, "::mlir::Value":$rhs,
		"/optional/ ::mlir::StringAttr":$error), [{
		build($_builder, $_state, result, ::llvm::makeArrayRef({lhs, rhs}), error);
		}]>
		];

let verifier = [{ return ::verifyShapeOrExtentTensorOp(*this); }];		let hasFolder = 1;
		let verifier = [{
		return success(succeeded(::verifyShapeOrExtentTensorOp(*this)) &&
		getNumOperands() >= 2);
		}];
}		}

def Shape_ConstShapeOp : Shape_Op<"const_shape", [ConstantLike, NoSideEffect]> {		def Shape_ConstShapeOp : Shape_Op<"const_shape", [ConstantLike, NoSideEffect]> {
let summary = "Creates a constant shape or extent tensor";		let summary = "Creates a constant shape or extent tensor";
let description = [{		let description = [{
Creates a constant shape or extent tensor. The individual extents are given		Creates a constant shape or extent tensor. The individual extents are given
as the `shape` attribute. The number of these values equals the shape's		as the `shape` attribute. The number of these values equals the shape's
rank.		rank.
▲ Show 20 Lines • Show All 750 Lines • Show Last 20 Lines

mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp

	//===- ShapeToStandard.cpp - conversion from Shape to Standard dialect ----===//			//===- ShapeToStandard.cpp - conversion from Shape to Standard dialect ----===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h"			#include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h"

	#include "../PassDetail.h"			#include "../PassDetail.h"
	#include "mlir/Dialect/SCF/SCF.h"			#include "mlir/Dialect/SCF/SCF.h"
	#include "mlir/Dialect/Shape/IR/Shape.h"			#include "mlir/Dialect/Shape/IR/Shape.h"
	#include "mlir/Dialect/StandardOps/IR/Ops.h"			#include "mlir/Dialect/StandardOps/IR/Ops.h"
	#include "mlir/Dialect/Tensor/IR/Tensor.h"			#include "mlir/Dialect/Tensor/IR/Tensor.h"
	#include "mlir/IR/BlockAndValueMapping.h"			#include "mlir/IR/BlockAndValueMapping.h"
				#include "mlir/IR/ImplicitLocOpBuilder.h"
	#include "mlir/Transforms/DialectConversion.h"			#include "mlir/Transforms/DialectConversion.h"
				#include "llvm/ADT/STLExtras.h"

	using namespace mlir;			using namespace mlir;
	using namespace mlir::shape;			using namespace mlir::shape;
	using namespace mlir::scf;			using namespace mlir::scf;

	/// Conversion patterns.			/// Conversion patterns.
	namespace {			namespace {
	class AnyOpConversion : public OpConversionPattern<AnyOp> {			class AnyOpConversion : public OpConversionPattern<AnyOp> {
	▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines
	namespace {			namespace {
	struct BroadcastOpConverter : public OpConversionPattern<BroadcastOp> {			struct BroadcastOpConverter : public OpConversionPattern<BroadcastOp> {
	using OpConversionPattern<BroadcastOp>::OpConversionPattern;			using OpConversionPattern<BroadcastOp>::OpConversionPattern;

	LogicalResult			LogicalResult
	matchAndRewrite(BroadcastOp op, ArrayRef<Value> operands,			matchAndRewrite(BroadcastOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const override;			ConversionPatternRewriter &rewriter) const override;
	};			};
	} // namespace

	LogicalResult BroadcastOpConverter::matchAndRewrite(			// The generic lowering of Broadcast given any number of inputs. It assumes all
	BroadcastOp op, ArrayRef<Value> operands,			// verification was done before being called.
				jpienaarUnsubmitted Done Reply Inline Actions The last sentence is true for all rewrites, so redundant here. jpienaar: The last sentence is true for all rewrites, so redundant here.
	ConversionPatternRewriter &rewriter) const {			Operation::result_range naryBroadcastLowering(BroadcastOp::Adaptor &op,
	// For now, this lowering is only defined on `tensor<?xindex>` operands, not			ImplicitLocOpBuilder lb) {
	// on shapes.			Value zero = lb.create<ConstantIndexOp>(0);
	if (op.getType().isa<ShapeType>())			Value one = lb.create<ConstantIndexOp>(1);
	return failure();			Type indexTy = lb.getIndexType();

				// Save all the ranks for bounds checking
				SmallVector<Value> ranks, rankDiffs;
				llvm::append_range(ranks, llvm::map_range(op.shapes(), [&](Value v) {
				return lb.create<DimOp>(v, zero);
				jpienaarUnsubmitted Done Reply Inline Actions A comment here would be good to explain why dim is used to get rank. Also does dim work on a shape? (Tensor yes, and so are we guaranteed we'd be in tensor world here?) jpienaar: A comment here would be good to explain why dim is used to get rank. Also does dim work on a…
				tpoppAuthorUnsubmitted Done Reply Inline Actions This was guaranteed in the caller. I'll replicate the check here though in case of future uses from other locations. tpopp: This was guaranteed in the caller. I'll replicate the check here though in case of future uses…
				}));

				// Find the maximum rank
				Value maxRank = ranks.front();
				for (Value v : llvm::drop_begin(ranks, 1)) {
				Value rankIsGreater = lb.create<CmpIOp>(CmpIPredicate::ugt, v, maxRank);
				maxRank = lb.create<SelectOp>(rankIsGreater, v, maxRank);
				}

				// Calculate the difference of ranks and the maximum rank for later offsets.
				llvm::append_range(rankDiffs, llvm::map_range(ranks, [&](Value v) {
				return lb.create<SubIOp>(indexTy, maxRank, v);
				}));

				return lb
				.create<tensor::GenerateOp>(
				getExtentTensorType(lb.getContext()), ValueRange{maxRank},
				[&](OpBuilder &b, Location loc, ValueRange args) {
				Value outputDimension = args[0];

	assert(!op.lhs().getType().isa<ShapeType>() &&			Value reduceDim = one;
	!op.rhs().getType().isa<ShapeType>());			for (auto tup : llvm::zip(op.shapes(), rankDiffs)) {
	auto loc = op.getLoc();			Value shape = std::get<0>(tup);
	BroadcastOp::Adaptor transformed(operands);			Value rankDiff = std::get<1>(tup);
	Value zero = rewriter.create<ConstantIndexOp>(loc, 0);			Value inBound = b.create<CmpIOp>(loc, CmpIPredicate::ult,
				herhutUnsubmitted Done Reply Inline Actions `inBound` is confusing. It is true if we are outside the bounds of the index, right? herhut: `inBound` is confusing. It is true if we are outside the bounds of the index, right?
	Value one = rewriter.create<ConstantIndexOp>(loc, 1);			outputDimension, rankDiff);
				Value dim =
				b.create<IfOp>(
				loc, TypeRange{indexTy}, inBound,
				[&](OpBuilder &b, Location loc) {
				b.create<scf::YieldOp>(loc, one);
				},
				[&](OpBuilder &b, Location loc) {
				// The broadcasting logic is:
				// - if one extent (here we arbitrarily choose the
				// extent from the greater-rank operand) is equal to 1,
				// then take the extent from the other operand
				// - otherwise, take the extent as-is.
				// Note that this logic remains correct in the presence
				// of
				herhutUnsubmitted Done Reply Inline Actions This sentence ends abruptly. herhut: This sentence ends abruptly.
				Value lesserRankOperandDimension = b.create<SubIOp>(
				loc, indexTy, outputDimension, rankDiff);
				Value lesserRankOperandExtent =
				b.create<tensor::ExtractOp>(
				loc, shape,
				ValueRange{lesserRankOperandDimension});
				b.create<scf::YieldOp>(loc, lesserRankOperandExtent);
				})
				.getResult(0);

				// Always give preference to a possibly non-1 extent
				Value dimIsOne =
				herhutUnsubmitted Done Reply Inline Actions You could fold this up. In the then case, return `reduceDim`. In the else case, do the select. herhut: You could fold this up. In the then case, return `reduceDim`. In the else case, do the select.
				b.create<CmpIOp>(loc, CmpIPredicate::eq, dim, one);
				reduceDim = b.create<SelectOp>(loc, dimIsOne, reduceDim, dim);
				}

				b.create<tensor::YieldOp>(loc, reduceDim);
				})
				->getResults();
				}

				herhutUnsubmitted Done Reply Inline Actions This comment seems lonely. herhut: This comment seems lonely.
				tpoppAuthorUnsubmitted Done Reply Inline Actions Fixed tpopp: Fixed
				// The specialized lowering for the common case binary broadcast case. This is
				// slightly more efficient and and arguably easier to read. It assumes all
				// verification was done before being called.
				Operation::result_range binaryBroadcastLowering(BroadcastOp::Adaptor &op,
				ImplicitLocOpBuilder lb) {
				Value zero = lb.create<ConstantIndexOp>(0);
				Value one = lb.create<ConstantIndexOp>(1);
				Type indexTy = lb.getIndexType();

	// Find smaller and greater rank and extent tensor.			// Find smaller and greater rank and extent tensor.
	Value lhsRank = rewriter.create<DimOp>(loc, op.lhs(), zero);			Value lhsRank = lb.create<DimOp>(op.shapes()[0], zero);
	Value rhsRank = rewriter.create<DimOp>(loc, op.rhs(), zero);			Value rhsRank = lb.create<DimOp>(op.shapes()[1], zero);
	Value lhsRankULE =			Value lhsRankULE = lb.create<CmpIOp>(CmpIPredicate::ule, lhsRank, rhsRank);
	rewriter.create<CmpIOp>(loc, CmpIPredicate::ule, lhsRank, rhsRank);			Value lesserRank = lb.create<SelectOp>(lhsRankULE, lhsRank, rhsRank);
	Type indexTy = rewriter.getIndexType();			Value greaterRank = lb.create<SelectOp>(lhsRankULE, rhsRank, lhsRank);
	Value lesserRank =
	rewriter.create<SelectOp>(loc, lhsRankULE, lhsRank, rhsRank);
	Value greaterRank =
	rewriter.create<SelectOp>(loc, lhsRankULE, rhsRank, lhsRank);
	auto erasedRankType =			auto erasedRankType =
	RankedTensorType::get({ShapedType::kDynamicSize}, indexTy);			RankedTensorType::get({ShapedType::kDynamicSize}, indexTy);
	Value rankErasedLhs =			Value rankErasedLhs =
	rewriter.create<tensor::CastOp>(loc, erasedRankType, transformed.lhs());			lb.create<tensor::CastOp>(erasedRankType, op.shapes()[0]);
	Value rankErasedRhs =			Value rankErasedRhs =
	rewriter.create<tensor::CastOp>(loc, erasedRankType, transformed.rhs());			lb.create<tensor::CastOp>(erasedRankType, op.shapes()[1]);
	Value lesserRankOperand =			Value lesserRankOperand =
	rewriter.create<SelectOp>(loc, lhsRankULE, rankErasedLhs, rankErasedRhs);			lb.create<SelectOp>(lhsRankULE, rankErasedLhs, rankErasedRhs);
	Value greaterRankOperand =			Value greaterRankOperand =
	rewriter.create<SelectOp>(loc, lhsRankULE, rankErasedRhs, rankErasedLhs);			lb.create<SelectOp>(lhsRankULE, rankErasedRhs, rankErasedLhs);

	Value rankDiff =			Value rankDiff = lb.create<SubIOp>(indexTy, greaterRank, lesserRank);
	rewriter.create<SubIOp>(loc, indexTy, greaterRank, lesserRank);			return lb
	rewriter.replaceOpWithNewOp<tensor::GenerateOp>(			.create<tensor::GenerateOp>(
	op, getExtentTensorType(op.getContext()), ValueRange{greaterRank},			getExtentTensorType(lb.getContext()), ValueRange{greaterRank},
	[&](OpBuilder &b, Location loc, ValueRange args) {			[&](OpBuilder &b, Location loc, ValueRange args) {
	Value outputDimension = args[0];			Value outputDimension = args[0];
	Value isUnchallengedDimension = b.create<CmpIOp>(			Value isUnchallengedDimension = b.create<CmpIOp>(
	loc, CmpIPredicate::ult, outputDimension, rankDiff);			loc, CmpIPredicate::ult, outputDimension, rankDiff);
	Value greaterRankOperandExtent = b.create<tensor::ExtractOp>(			Value greaterRankOperandExtent = b.create<tensor::ExtractOp>(
	loc, greaterRankOperand, outputDimension);			loc, greaterRankOperand, outputDimension);
	// The initial dimensions of the greater-rank operand are unchallenged,			// The initial dimensions of the greater-rank operand are
				jpienaarUnsubmitted Done Reply Inline Actions Could more be reused here? E.g., is the nary one (excluding computing the max rank) less efficient for the binary case? Or vice versa, is multiple binary case applications less efficient than nary lowering? jpienaar: Could more be reused here? E.g., is the nary one (excluding computing the max rank) less…
				herhutUnsubmitted Done Reply Inline Actions Thinking of it, maybe doing multiple 2d broadcasts in a row (in the implementation, not the op) would yield similar performance. herhut: Thinking of it, maybe doing multiple 2d broadcasts in a row (in the implementation, not the op)…
				tpoppAuthorUnsubmitted Done Reply Inline Actions I think performance differences between the two are negligible. We already fully unroll the `tensor::GenerateOp`, so they will be roughly the same. For the binary case, we need a starting value during reductions in the nary lowering (without making the c++ code much more complex) while the binary case can skip that step. On the other hand, the binary case might recompute a small amount of work between each invocation for more than 2 inputs. Technically the nary case has the potential to be just as performant. I'm just not sure how clean I can make it look. I personally find the n-ary lowering to be easier to read, and I think Stephan has in the past said he expected the binary case was easier to read. I think we should choose the implementation that we find easier to read and stick with that. I would like to hear your opinions on if you agree or not, and which you find easier to read. tpopp: I think performance differences between the two are negligible. We already fully unroll the…
				herhutUnsubmitted Done Reply Inline Actions I found the binary case easier because my n-ary case looked like a mess. Looking at your code, this is much nicer. So let's ship the n-ary case only. The performance difference should be negligible. Could you also extend the `cstr_broadcastable` accordingly? They need to be in sync otherwise `broadcast` cannot really be used. herhut: I found the binary case easier because my n-ary case looked like a mess. Looking at your code…
				tpoppAuthorUnsubmitted Done Reply Inline Actions Binary is removed. I'll extend `cstr_broadcastable` in a follow up CL. tpopp: Binary is removed. I'll extend `cstr_broadcastable` in a follow up CL.
	// so we can take them as-is. Otherwise, we need to do a comparison.			// unchallenged, so we can take them as-is. Otherwise, we need to do
	// We need an actual branch here (instead of a select) because the			// a comparison. We need an actual branch here (instead of a select)
	// lesser-rank operand might be rank 0, so any tensor.extract would be			// because the lesser-rank operand might be rank 0, so any
	// invalid.			// tensor.extract would be invalid.
	auto ifOp = b.create<IfOp>(			auto ifOp = b.create<IfOp>(
	loc, TypeRange{indexTy}, isUnchallengedDimension,			loc, TypeRange{indexTy}, isUnchallengedDimension,
	[&](OpBuilder &b, Location loc) {			[&](OpBuilder &b, Location loc) {
	b.create<scf::YieldOp>(loc, greaterRankOperandExtent);			b.create<scf::YieldOp>(loc, greaterRankOperandExtent);
	},			},
	[&](OpBuilder &b, Location loc) {			[&](OpBuilder &b, Location loc) {
	// The broadcasting logic is:			// The broadcasting logic is:
	// - if one extent (here we arbitrarily choose the extent from			// - if one extent (here we arbitrarily choose the extent from
	// the greater-rank operand) is equal to 1, then take the extent			// the greater-rank operand) is equal to 1, then take the
	// from the other operand			// extent from the other operand
	// - otherwise, take the extent as-is.			// - otherwise, take the extent as-is.
	// Note that this logic remains correct in the presence of			// Note that this logic remains correct in the presence of
	// dimensions of zero extent.			// dimensions of zero extent.
	Value lesserRankOperandDimension =			Value lesserRankOperandDimension =
	b.create<SubIOp>(loc, indexTy, outputDimension, rankDiff);			b.create<SubIOp>(loc, indexTy, outputDimension, rankDiff);
	Value lesserRankOperandExtent = b.create<tensor::ExtractOp>(			Value lesserRankOperandExtent = b.create<tensor::ExtractOp>(
	loc, lesserRankOperand,			loc, lesserRankOperand,
	ValueRange{lesserRankOperandDimension});			ValueRange{lesserRankOperandDimension});
	Value greaterRankOperandExtentIsOne = b.create<CmpIOp>(			Value greaterRankOperandExtentIsOne = b.create<CmpIOp>(
	loc, CmpIPredicate::eq, greaterRankOperandExtent, one);			loc, CmpIPredicate::eq, greaterRankOperandExtent, one);
	Value broadcastedExtent = b.create<SelectOp>(			Value broadcastedExtent = b.create<SelectOp>(
	loc, greaterRankOperandExtentIsOne, lesserRankOperandExtent,			loc, greaterRankOperandExtentIsOne,
	greaterRankOperandExtent);			lesserRankOperandExtent, greaterRankOperandExtent);
	b.create<scf::YieldOp>(loc, broadcastedExtent);			b.create<scf::YieldOp>(loc, broadcastedExtent);
	});			});
	b.create<tensor::YieldOp>(loc, ifOp.getResult(0));			b.create<tensor::YieldOp>(loc, ifOp.getResult(0));
	});			})
				->getResults();
				}

				} // namespace

				LogicalResult BroadcastOpConverter::matchAndRewrite(
				BroadcastOp op, ArrayRef<Value> operands,
				ConversionPatternRewriter &rewriter) const {
				// For now, this lowering is only defined on `tensor<?xindex>` operands, not
				// on shapes.
				if (op.getType().isa<ShapeType>())
				return failure();

				BroadcastOp::Adaptor transformed(operands);
				assert(llvm::all_of(transformed.shapes(),
				[](Value v) { return !v.getType().isa<ShapeType>(); }));

				auto loc = op.getLoc();
				ImplicitLocOpBuilder lb(loc, rewriter);

				// Specialize case the 2 input case as it is slightly optimized and might be
				// slightly easier to read.
				if (transformed.shapes().size() == 2) {
				rewriter.replaceOp(op, binaryBroadcastLowering(transformed, lb));
				} else {
				rewriter.replaceOp(op, naryBroadcastLowering(transformed, lb));
				}
				herhutUnsubmitted Not Done Reply Inline Actions Does it still make sense to have this in an extra function? herhut: Does it still make sense to have this in an extra function?
				tpoppAuthorUnsubmitted Done Reply Inline Actions I refactored to have only part of this function separate as it's convenient or use in the broadcastable follow up tpopp: I refactored to have only part of this function separate as it's convenient or use in the…
	return success();			return success();
	}			}

	namespace {			namespace {
	class ConstShapeOpConverter : public OpConversionPattern<ConstShapeOp> {			class ConstShapeOpConverter : public OpConversionPattern<ConstShapeOp> {
	public:			public:
	using OpConversionPattern<ConstShapeOp>::OpConversionPattern;			using OpConversionPattern<ConstShapeOp>::OpConversionPattern;

	▲ Show 20 Lines • Show All 464 Lines • Show Last 20 Lines

mlir/lib/Dialect/Shape/IR/Shape.cpp

Show First 20 Lines • Show All 346 Lines • ▼ Show 20 Lines	if (op.getNumOperands() == 0)
return op.emitOpError("no operands specified");		return op.emitOpError("no operands specified");

return success();		return success();
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// BroadcastOp		// BroadcastOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
		//
		herhutUnsubmitted Done Reply Inline Actions Is this intended? herhut: Is this intended?
OpFoldResult BroadcastOp::fold(ArrayRef<Attribute> operands) {		OpFoldResult BroadcastOp::fold(ArrayRef<Attribute> operands) {
if (!operands[1])		if (!operands[1])
return nullptr;		return nullptr;

		// TODO: Support folding with more than 2 input shapes
		if (operands.size() > 2 && !operands[2].isa<StringAttr>())
		return nullptr;

auto rhsShape = llvm::to_vector<6>(		auto rhsShape = llvm::to_vector<6>(
operands[1].cast<DenseIntElementsAttr>().getValues<int64_t>());		operands[1].cast<DenseIntElementsAttr>().getValues<int64_t>());
if (rhsShape.empty())		if (rhsShape.empty())
return lhs();		return shapes()[0];

if (!operands[0])		if (!operands[0])
return nullptr;		return nullptr;

auto lhsShape = llvm::to_vector<6>(		auto lhsShape = llvm::to_vector<6>(
operands[0].cast<DenseIntElementsAttr>().getValues<int64_t>());		operands[0].cast<DenseIntElementsAttr>().getValues<int64_t>());
if (lhsShape.empty())		if (lhsShape.empty())
return rhs();		return shapes()[1];

SmallVector<int64_t, 6> resultShape;		SmallVector<int64_t, 6> resultShape;
// If the shapes are not compatible, we can't fold it.		// If the shapes are not compatible, we can't fold it.
// TODO: Fold to an "error".		// TODO: Fold to an "error".
if (!OpTrait::util::getBroadcastedShape(lhsShape, rhsShape, resultShape))		if (!OpTrait::util::getBroadcastedShape(lhsShape, rhsShape, resultShape))
return nullptr;		return nullptr;
Builder builder(getContext());		Builder builder(getContext());
return builder.getIndexTensorAttr(resultShape);		return builder.getIndexTensorAttr(resultShape);
▲ Show 20 Lines • Show All 660 Lines • Show Last 20 Lines

mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir

	Show First 20 Lines • Show All 453 Lines • ▼ Show 20 Lines
	// CHECK: %[[EITHER_EXTENT_IS_ONE:.*]] = or %[[LARGER_EXTENT_IS_ONE]], %[[SMALLER_EXTENT_IS_ONE]] : i1			// CHECK: %[[EITHER_EXTENT_IS_ONE:.*]] = or %[[LARGER_EXTENT_IS_ONE]], %[[SMALLER_EXTENT_IS_ONE]] : i1
	// CHECK: %[[OR_EXTENTS_ARE_EQUAL:.*]] = or %[[EITHER_EXTENT_IS_ONE]], %[[EXTENTS_ARE_EQUAL]] : i1			// CHECK: %[[OR_EXTENTS_ARE_EQUAL:.*]] = or %[[EITHER_EXTENT_IS_ONE]], %[[EXTENTS_ARE_EQUAL]] : i1
	// CHECK: %[[NEW_ALL_SO_FAR:.*]] = and %[[ALL_SO_FAR]], %[[OR_EXTENTS_ARE_EQUAL]] : i1			// CHECK: %[[NEW_ALL_SO_FAR:.*]] = and %[[ALL_SO_FAR]], %[[OR_EXTENTS_ARE_EQUAL]] : i1
	// CHECK: scf.yield %[[NEW_ALL_SO_FAR]] : i1			// CHECK: scf.yield %[[NEW_ALL_SO_FAR]] : i1
	// CHECK: }			// CHECK: }
	// CHECK: %[[RESULT:.*]] = shape.cstr_require %[[ALL_RESULT]], "required broadcastable shapes"			// CHECK: %[[RESULT:.*]] = shape.cstr_require %[[ALL_RESULT]], "required broadcastable shapes"
	// CHECK: return %[[RESULT]] : !shape.witness			// CHECK: return %[[RESULT]] : !shape.witness
	// CHECK: }			// CHECK: }

				// -----

				func @broadcast_3_shapes_different_extents(%a : tensor<2xindex>,
				%b : tensor<3xindex>,
				%c : tensor<2xindex>) {
				// CHECK-LABEL: func @broadcast_3_shapes_different_extents(
				// CHECK-SAME: %[[ARG0:.*]]: tensor<2xindex>,
				// CHECK-SAME: %[[ARG1:.*]]: tensor<3xindex>,
				// CHECK-SAME: %[[ARG2:.*]]: tensor<2xindex>) {
				// CHECK: %[[C0:.*]] = constant 0 : index
				// CHECK: %[[C1:.*]] = constant 1 : index
				// CHECK: %[[RANK0:.*]] = dim %[[ARG0]], %[[C0]] : tensor<2xindex>
				// CHECK: %[[RANK1:.*]] = dim %[[ARG1]], %[[C0]] : tensor<3xindex>
				// CHECK: %[[RANK2:.*]] = dim %[[ARG2]], %[[C0]] : tensor<2xindex>
				// CHECK: %[[CMP0:.*]] = cmpi ugt, %[[RANK1]], %[[RANK0]] : index
				// CHECK: %[[LARGER_DIM:.*]] = select %[[CMP0]], %[[RANK1]], %[[RANK0]] : index
				// CHECK: %[[CMP1:.*]] = cmpi ugt, %[[RANK2]], %[[LARGER_DIM]] : index
				// CHECK: %[[MAX_RANK:.*]] = select %[[CMP1]], %[[RANK2]], %[[LARGER_DIM]] : index
				// CHECK: %[[DIM_DIFF0:.*]] = subi %[[MAX_RANK]], %[[RANK0]] : index
				// CHECK: %[[DIM_DIFF1:.*]] = subi %[[MAX_RANK]], %[[RANK1]] : index
				// CHECK: %[[DIM_DIFF2:.*]] = subi %[[MAX_RANK]], %[[RANK2]] : index
				// CHECK: %[[RESULT:.*]] = tensor.generate %[[MAX_RANK]] {
				// CHECK: ^bb0(%[[IDX:.*]]: index):
				// CHECK: %[[OUTBOUNDS0:.*]] = cmpi ult, %[[IDX]], %[[DIM_DIFF0]] : index
				// CHECK: %[[DIM0:.*]] = scf.if %[[OUTBOUNDS0]] -> (index) {
				// CHECK: scf.yield %[[C1]] : index
				// CHECK: } else {
				// CHECK: %[[IDX0:.*]] = subi %[[IDX]], %[[DIM_DIFF0]] : index
				// CHECK: %[[VAL_23:.*]] = tensor.extract %[[ARG0]]{{\[}}%[[IDX0]]] : tensor<2xindex>
				// CHECK: scf.yield %[[VAL_23]] : index
				// CHECK: }
				// CHECK: %[[DIM0_IS_1:.]] = cmpi eq, %[[DIM0:.]], %[[C1]] : index
				// CHECK: %[[MAX_DIM0:.*]] = select %[[DIM0_IS_1]], %[[C1]], %[[DIM0]] : index
				// CHECK: %[[VAL_28:.*]] = cmpi ult, %[[IDX]], %[[DIM_DIFF1]] : index
				// CHECK: %[[DIM1:.*]] = scf.if %[[VAL_28]] -> (index) {
				// CHECK: scf.yield %[[C1]] : index
				// CHECK: } else {
				// CHECK: %[[IDX1:.*]] = subi %[[IDX]], %[[DIM_DIFF1]] : index
				// CHECK: %[[VAL_31:.*]] = tensor.extract %[[ARG1]]{{\[}}%[[IDX1]]] : tensor<3xindex>
				// CHECK: scf.yield %[[VAL_31]] : index
				// CHECK: }
				// CHECK: %[[DIM1_IS_1:.]] = cmpi eq, %[[DIM1:.]], %[[C1]] : index
				// CHECK: %[[MAX_DIM1:.*]] = select %[[DIM1_IS_1]], %[[MAX_DIM0]], %[[DIM1]] : index
				// CHECK: %[[VAL_36:.*]] = cmpi ult, %[[IDX]], %[[DIM_DIFF2]] : index
				// CHECK: %[[DIM2:.*]] = scf.if %[[VAL_36]] -> (index) {
				// CHECK: scf.yield %[[C1]] : index
				// CHECK: } else {
				// CHECK: %[[IDX2:.*]] = subi %[[IDX]], %[[DIM_DIFF2]] : index
				// CHECK: %[[VAL_39:.*]] = tensor.extract %[[ARG2]]{{\[}}%[[IDX2]]] : tensor<2xindex>
				// CHECK: scf.yield %[[VAL_39]] : index
				// CHECK: }
				// CHECK: %[[DIM2_IS_1:.]] = cmpi eq, %[[DIM2:.]], %[[C1]] : index
				// CHECK: %[[MAX_DIM2:.*]] = select %[[DIM2_IS_1]], %[[MAX_DIM1]], %[[DIM2]] : index
				// CHECK: tensor.yield %[[MAX_DIM2]] : index
				// CHECK: } : tensor<?xindex>
				// CHECK: return
				// CHECK: }
				%0 = shape.broadcast %a, %b, %c
				: tensor<2xindex>, tensor<3xindex>, tensor<2xindex> -> tensor<?xindex>
				return
				}

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][shape] Generalize broadcast to a variadic number of shapes
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 320440

mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td

mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp

mlir/lib/Dialect/Shape/IR/Shape.cpp

mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][shape] Generalize broadcast to a variadic number of shapesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 320440

mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td

mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp

mlir/lib/Dialect/Shape/IR/Shape.cpp

mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir

[mlir][shape] Generalize broadcast to a variadic number of shapes
ClosedPublic