Diff 331719

mlir/include/mlir/Conversion/TosaToStandard/TosaToStandard.h

	Show All 17 Lines
	namespace mlir {			namespace mlir {
	namespace tosa {			namespace tosa {

	std::unique_ptr<Pass> createTosaToStandard();			std::unique_ptr<Pass> createTosaToStandard();

	void populateTosaToStandardConversionPatterns(			void populateTosaToStandardConversionPatterns(
	MLIRContext context, OwningRewritePatternList patterns);			MLIRContext context, OwningRewritePatternList patterns);

				void populateTosaRescaleToStandardConversionPatterns(
				MLIRContext context, OwningRewritePatternList patterns);

	/// Populates passes to convert from TOSA to Standard.			/// Populates passes to convert from TOSA to Standard.
	void addTosaToStandardPasses(OpPassManager &pm);			void addTosaToStandardPasses(OpPassManager &pm);

	} // namespace tosa			} // namespace tosa
	} // namespace mlir			} // namespace mlir

	#endif // MLIR_CONVERSION_TOSATOSTANDARD_TOSATOSTANDARD_H			#endif // MLIR_CONVERSION_TOSATOSTANDARD_TOSATOSTANDARD_H

mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td

Show First 20 Lines • Show All 1,488 Lines • ▼ Show 20 Lines	let arguments = (ins
BoolAttr:$per_channel		BoolAttr:$per_channel
);		);

let results = (outs		let results = (outs
Tosa_TensorUpto4D:$output		Tosa_TensorUpto4D:$output
);		);
}		}

		def Tosa_ApplyScaleOp: Tosa_Op<"apply_scale", [NoSideEffect] # ElementwiseMappable.traits> {
		silvasUnsubmitted Done Reply Inline Actions Apply ElementwiseMappable traits. (requires allowing vectors and tensors as valid arguments, but that gives us linalg vectorization for free). silvas: Apply ElementwiseMappable traits. (requires allowing vectors and tensors as valid arguments…
		let summary = "Rescale scalar operator for Tosa tensor operators";

		let description = [{
		Applies rescaling for fixed point values. This behavior is replicated in
		multiple quantized operations (mul, convolution, rescale, matmul, pooling).

		The commonplace implementation is to use i64 operations to avoid integer
		silvasUnsubmitted Not Done Reply Inline Actions I would prefer if this op was defined such that the body of the linalg op we produce contains a single op. It seems arbitrary to decompose part of the op as part of tosa->linalg and part in tosa->std. We want to preserve as much information as possible for the specialized code paths for neon / etc. silvas: I would prefer if this op was defined such that the body of the linalg op we produce contains a…
		rsudermanAuthorUnsubmitted Done Reply Inline Actions So ApplyScale has a specific meaning in the TOSA specification. The additional work covers handling zero-points and clipping, which are mostly unique to Rescale and should already be handled well by our vectorization library. rsuderman: So ApplyScale has a specific meaning in the TOSA specification. The additional work covers…
		overflow with target specific implementations can use native operations to
		avoid wider than necessary types.
		}];

		let arguments = (ins
		Tosa_Int32Like:$value,
		Tosa_Int32Like:$multiplier,
		Tosa_Int8Like:$shift,
		BoolAttr:$double_round
		);

		let results = (outs
		Tosa_Int32:$output
		);
		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// TOSA Spec Section 2.13		// TOSA Spec Section 2.13
// Operator Class: Data Node Ops.		// Operator Class: Data Node Ops.
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Operator: const		// Operator: const
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
▲ Show 20 Lines • Show All 198 Lines • Show Last 20 Lines

mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td

	Show First 20 Lines • Show All 122 Lines • ▼ Show 20 Lines
	def Tosa_Tensor1Dto4D : TensorRankOf<[Tosa_AnyNumber], [1,2,3,4]>;			def Tosa_Tensor1Dto4D : TensorRankOf<[Tosa_AnyNumber], [1,2,3,4]>;
	def Tosa_Tensor1Dto5D : TensorRankOf<[Tosa_AnyNumber], [1,2,3,4,5]>;			def Tosa_Tensor1Dto5D : TensorRankOf<[Tosa_AnyNumber], [1,2,3,4,5]>;
	def Tosa_Tensor1Dto6D : TensorRankOf<[Tosa_AnyNumber], [1,2,3,4,5,6]>;			def Tosa_Tensor1Dto6D : TensorRankOf<[Tosa_AnyNumber], [1,2,3,4,5,6]>;

	def Tosa_TensorUpto4D : TensorRankOf<[Tosa_AnyNumber], [0,1,2,3,4]>;			def Tosa_TensorUpto4D : TensorRankOf<[Tosa_AnyNumber], [0,1,2,3,4]>;
	def Tosa_TensorUpto6D : TensorRankOf<[Tosa_AnyNumber], [0,1,2,3,4,5,6]>;			def Tosa_TensorUpto6D : TensorRankOf<[Tosa_AnyNumber], [0,1,2,3,4,5,6]>;

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
				// Generic scalar, vector, or tensor of a particular type.
				//===----------------------------------------------------------------------===//

				class Tosa_TypeLike<list<Type> types, string description = ""> : TypeConstraint<Or<[
				AnyTypeOf<types>.predicate,
				VectorOf<types>.predicate,
				TensorOf<types>.predicate]>,
				"signless-integer-32-like">;

				def Tosa_Int8Like : Tosa_TypeLike<[Tosa_Int8], "signless-integer-8-bit-like">;
				def Tosa_Int16Like : Tosa_TypeLike<[Tosa_Int16], "signless-integer-16-bit-like">;
				def Tosa_Int32Like : Tosa_TypeLike<[Tosa_Int32], "signless-integer-32-bit-like">;
				def Tosa_Int64Like : Tosa_TypeLike<[Tosa_Int64], "signless-integer-64-bit-like">;

				//===----------------------------------------------------------------------===//
	// Attribute predicates and classes.			// Attribute predicates and classes.
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	class ArrayMaxCt<int n> : AttrConstraint<			class ArrayMaxCt<int n> : AttrConstraint<
	CPred<"$_self.cast<::mlir::ArrayAttr>().size() <= " # n>,			CPred<"$_self.cast<::mlir::ArrayAttr>().size() <= " # n>,
	"with at least " # n # " elements">;			"with at least " # n # " elements">;

	def Tosa_IntArrayAttr2 : Confined<I64ArrayAttr, [ArrayCount<2>]>;			def Tosa_IntArrayAttr2 : Confined<I64ArrayAttr, [ArrayCount<2>]>;
	def Tosa_IntArrayAttr3 : Confined<I64ArrayAttr, [ArrayCount<3>]>;			def Tosa_IntArrayAttr3 : Confined<I64ArrayAttr, [ArrayCount<3>]>;
	Show All 25 Lines

mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp

Show All 26 Lines

static SmallVector<StringRef> getNParallelLoopsAttrs(unsigned nParallelLoops) {		static SmallVector<StringRef> getNParallelLoopsAttrs(unsigned nParallelLoops) {
return SmallVector<StringRef>(nParallelLoops, getParallelIteratorTypeName());		return SmallVector<StringRef>(nParallelLoops, getParallelIteratorTypeName());
}		}

template <typename T>		template <typename T>
static mlir::ConstantOp		static mlir::ConstantOp
createConstFromIntAttribute(Operation *op, std::string attrName,		createConstFromIntAttribute(Operation *op, std::string attrName,
Type requiredAttrType, PatternRewriter &rewriter) {		Type requiredAttrType, OpBuilder &rewriter) {
auto castedN = static_cast<T>(		auto castedN = static_cast<T>(
op->getAttr(attrName).cast<IntegerAttr>().getValue().getSExtValue());		op->getAttr(attrName).cast<IntegerAttr>().getValue().getSExtValue());
return rewriter.create<mlir::ConstantOp>(		return rewriter.create<mlir::ConstantOp>(
op->getLoc(), IntegerAttr::get(requiredAttrType, castedN));		op->getLoc(), IntegerAttr::get(requiredAttrType, castedN));
}		}

		template <typename T>
		static void getValuesFromIntArrayAttribute(ArrayAttr attr,
		SmallVector<T> &arrayValues) {
		for (Attribute val : attr.getValue()) {
		arrayValues.push_back(val.cast<IntegerAttr>().getValue().getSExtValue());
		}
		}

		// Generates an affine map for parallel operations on a given type. This
		silvasUnsubmitted Done Reply Inline Actions Doc comment. silvas: Doc comment.
		// performs implicit broadcasting across any dimension of size-1.
		static AffineMap createAffineMapForType(ShapedType type,
		PatternRewriter &rewriter) {
		unsigned rank = type.getRank();
		auto shape = type.getShape();
		SmallVector<AffineExpr, 4> dimExprs;
		dimExprs.reserve(rank);
		for (unsigned i = 0; i < rank; ++i) {
		// If the dimension is one we can broadcast the input with a constant
		// affine expression.
		if (shape[i] == 1)
		dimExprs.push_back(rewriter.getAffineConstantExpr(0));
		else
		dimExprs.push_back(rewriter.getAffineDimExpr(i));
		}
		return AffineMap::get(/dimCount=/rank, /symbolCount=/0, dimExprs,
		rewriter.getContext());
		}

template <typename T, typename P>		template <typename T, typename P>
static mlir::SelectOp clampHelper(Operation *op, ValueRange args,		static mlir::SelectOp clampHelper(Location loc, Value arg, mlir::ConstantOp min,
mlir::ConstantOp min, mlir::ConstantOp max,		mlir::ConstantOp max, P pred,
P pred, PatternRewriter &rewriter) {		OpBuilder &rewriter) {
Location loc = op->getLoc();		auto smallerThanMin = rewriter.create<T>(loc, pred, arg, min);
auto smallerThanMin = rewriter.create<T>(loc, pred, args[0], min);
auto minOrArg =		auto minOrArg =
rewriter.create<mlir::SelectOp>(loc, smallerThanMin, min, args[0]);		rewriter.create<mlir::SelectOp>(loc, smallerThanMin, min, arg);
auto largerThanMax = rewriter.create<T>(loc, pred, max, args[0]);		auto largerThanMax = rewriter.create<T>(loc, pred, max, arg);
return rewriter.create<mlir::SelectOp>(loc, largerThanMax, max, minOrArg);		return rewriter.create<mlir::SelectOp>(loc, largerThanMax, max, minOrArg);
}		}

static Value		static Value
createLinalgBodyCalculationForElementwiseOp(Operation *op, ValueRange args,		createLinalgBodyCalculationForElementwiseOp(Operation *op, ValueRange args,
ArrayRef<Type> resultTypes,		ArrayRef<Type> resultTypes,
PatternRewriter &rewriter) {		PatternRewriter &rewriter) {
Location loc = op->getLoc();		Location loc = op->getLoc();
▲ Show 20 Lines • Show All 147 Lines • ▼ Show 20 Lines	if (isa<tosa::FloorOp>(op) && elementTy.isa<FloatType>())
return rewriter.create<mlir::FloorFOp>(loc, resultTypes, args);		return rewriter.create<mlir::FloorFOp>(loc, resultTypes, args);

// tosa::ClampOp		// tosa::ClampOp
if (isa<tosa::ClampOp>(op) && elementTy.isa<FloatType>()) {		if (isa<tosa::ClampOp>(op) && elementTy.isa<FloatType>()) {
auto min = rewriter.create<mlir::ConstantOp>(loc, elementTy,		auto min = rewriter.create<mlir::ConstantOp>(loc, elementTy,
op->getAttr("min_fp"));		op->getAttr("min_fp"));
auto max = rewriter.create<mlir::ConstantOp>(loc, elementTy,		auto max = rewriter.create<mlir::ConstantOp>(loc, elementTy,
op->getAttr("max_fp"));		op->getAttr("max_fp"));
return clampHelper<mlir::CmpFOp>(op, args, min, max, CmpFPredicate::OLT,		return clampHelper<mlir::CmpFOp>(loc, args[0], min, max, CmpFPredicate::OLT,
rewriter);		rewriter);
}		}

if (isa<tosa::ClampOp>(op) && elementTy.isa<IntegerType>()) {		if (isa<tosa::ClampOp>(op) && elementTy.isa<IntegerType>()) {
auto min = createConstFromIntAttribute<int32_t>(op, "min_int", elementTy,		auto min = createConstFromIntAttribute<int32_t>(op, "min_int", elementTy,
rewriter);		rewriter);
auto max = createConstFromIntAttribute<int32_t>(op, "max_int", elementTy,		auto max = createConstFromIntAttribute<int32_t>(op, "max_int", elementTy,
rewriter);		rewriter);
return clampHelper<mlir::CmpIOp>(op, args, min, max, CmpIPredicate::slt,		return clampHelper<mlir::CmpIOp>(loc, args[0], min, max, CmpIPredicate::slt,
rewriter);		rewriter);
}		}

// tosa::ReluNOp		// tosa::ReluNOp
if (isa<tosa::ReluNOp>(op) && elementTy.isa<FloatType>()) {		if (isa<tosa::ReluNOp>(op) && elementTy.isa<FloatType>()) {
auto zero =		auto zero =
rewriter.create<mlir::ConstantOp>(loc, FloatAttr::get(elementTy, 0));		rewriter.create<mlir::ConstantOp>(loc, FloatAttr::get(elementTy, 0));
auto n = rewriter.create<mlir::ConstantOp>(loc, elementTy,		auto n = rewriter.create<mlir::ConstantOp>(loc, elementTy,
op->getAttr("max_fp"));		op->getAttr("max_fp"));
return clampHelper<mlir::CmpFOp>(op, args, zero, n, CmpFPredicate::OLT,		return clampHelper<mlir::CmpFOp>(loc, args[0], zero, n, CmpFPredicate::OLT,
rewriter);		rewriter);
}		}

if (isa<tosa::ReluNOp>(op) && elementTy.isa<IntegerType>()) {		if (isa<tosa::ReluNOp>(op) && elementTy.isa<IntegerType>()) {
auto zero =		auto zero =
rewriter.create<mlir::ConstantOp>(loc, IntegerAttr::get(elementTy, 0));		rewriter.create<mlir::ConstantOp>(loc, IntegerAttr::get(elementTy, 0));
auto n = createConstFromIntAttribute<int32_t>(op, "max_int", elementTy,		auto n = createConstFromIntAttribute<int32_t>(op, "max_int", elementTy,
rewriter);		rewriter);
return clampHelper<mlir::CmpIOp>(op, args, zero, n, CmpIPredicate::slt,		return clampHelper<mlir::CmpIOp>(loc, args[0], zero, n, CmpIPredicate::slt,
rewriter);		rewriter);
}		}

(void)rewriter.notifyMatchFailure(		(void)rewriter.notifyMatchFailure(
op, "unhandled op for linalg body calculation for elementwise op");		op, "unhandled op for linalg body calculation for elementwise op");
return nullptr;		return nullptr;
}		}

Show All 34 Lines	elementwiseMatchAndRewriteHelper(Operation *operation,
auto bodyResultTypes = llvm::to_vector<4>(llvm::map_range(		auto bodyResultTypes = llvm::to_vector<4>(llvm::map_range(
initTensors, [](Value v) { return getElementTypeOrSelf(v); }));		initTensors, [](Value v) { return getElementTypeOrSelf(v); }));

unsigned nloops = t0.getRank();		unsigned nloops = t0.getRank();
SmallVector<AffineMap, 2> indexingMaps;		SmallVector<AffineMap, 2> indexingMaps;
indexingMaps.reserve(operation->getNumOperands() + bodyResultTypes.size());		indexingMaps.reserve(operation->getNumOperands() + bodyResultTypes.size());

// Input indexing maps may be broadcasted.		// Input indexing maps may be broadcasted.
for (Type types : operation->getOperandTypes()) {		for (Type type : operation->getOperandTypes()) {
auto shape = types.cast<ShapedType>().getShape();		indexingMaps.push_back(
SmallVector<AffineExpr, 4> dimExprs;		createAffineMapForType(type.cast<ShapedType>(), rewriter));
dimExprs.reserve(nloops);
for (unsigned i = 0; i < nloops; ++i) {
// If the dimension is one we can broadcast the input with a constant
// affine expression.
if (shape[i] == 1)
dimExprs.push_back(rewriter.getAffineConstantExpr(0));
else
dimExprs.push_back(rewriter.getAffineDimExpr(i));
}
indexingMaps.push_back(AffineMap::get(/dimCount=/nloops,
/symbolCount=/0, dimExprs,
rewriter.getContext()));
}		}

indexingMaps.append(operation->getNumResults(),		indexingMaps.append(operation->getNumResults(),
rewriter.getMultiDimIdentityMap(nloops));		rewriter.getMultiDimIdentityMap(nloops));

bool didEncounterError = false;		bool didEncounterError = false;
auto linalgOp = rewriter.create<linalg::GenericOp>(		auto linalgOp = rewriter.create<linalg::GenericOp>(
loc, opResultTypes, operation->getOperands(), initTensors, indexingMaps,		loc, opResultTypes, operation->getOperands(), initTensors, indexingMaps,
▲ Show 20 Lines • Show All 311 Lines • ▼ Show 20 Lines	rewriter.replaceOpWithNewOp<linalg::GenericOp>(
getNParallelLoopsAttrs(resultTy.getRank()),		getNParallelLoopsAttrs(resultTy.getRank()),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {		[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) {
nestedBuilder.create<linalg::YieldOp>(op.getLoc(), *args.begin());		nestedBuilder.create<linalg::YieldOp>(op.getLoc(), *args.begin());
});		});
return success();		return success();
}		}
};		};

		class RescaleOpConverter : public OpRewritePattern<tosa::RescaleOp> {
		silvasUnsubmitted Done Reply Inline Actions with apply_scale, this should just be another elementwise conversion like any other. I don't understand why there is so much code involvedhere. (i.e. based on my a priori understanding, it seems like this patch should only change createLinalgBodyCalculationForElementwiseOp) silvas: with apply_scale, this should just be another elementwise conversion like any other. I don't…
		rsudermanAuthorUnsubmitted Done Reply Inline Actions There is quite a bit more required for Rescale. Values are not passed as tensors but as attributes and need to be serialized to constants. Furthermore, the multiply/shift values need to be conditionally broadcasted (depending whether we are rescaling per channel). rsuderman: There is quite a bit more required for Rescale. Values are not passed as tensors but as…
		public:
		using OpRewritePattern<tosa::RescaleOp>::OpRewritePattern;

		LogicalResult matchAndRewrite(tosa::RescaleOp op,
		PatternRewriter &rewriter) const final {
		auto loc = op.getLoc();
		auto input = op.input();
		auto inputTy = op.input().getType().cast<ShapedType>();
		auto outputTy = op.output().getType().cast<ShapedType>();
		unsigned rank = inputTy.getRank();

		if (!outputTy.hasStaticShape())
		return rewriter.notifyMatchFailure(
		op, "tosa to linalg conversion expects statically shaped tensors");

		// The shift and multiplier values.
		SmallVector<int32_t> multiplierValues;
		getValuesFromIntArrayAttribute(op.multiplier(), multiplierValues);

		SmallVector<int8_t> shiftValues;
		getValuesFromIntArrayAttribute(op.shift(), shiftValues);

		// Double round only occurs if shift is greater than 31, check that this
		// is ever true.
		bool doubleRound =
		op.double_round() &&
		llvm::any_of(shiftValues, [](int32_t v) { return v > 31; });

		// We need to broadcast along the last dimension, so make all dims 1.
		SmallVector<int64_t> multiplierShape;
		multiplierShape.resize(rank, 1);

		SmallVector<int64_t> shiftShape;
		shiftShape.resize(rank, 1);

		silvasUnsubmitted Done Reply Inline Actions does scale32 need to be handled? silvas: does scale32 need to be handled?
		rsudermanAuthorUnsubmitted Done Reply Inline Actions The Scale32 boolean is fairly superfluous. If Scale32 is false it means the input is an i48, which we don't support yet. It is debateable whether we support i48 types in the future (and will likely require a fairly different codegen with only i64 simulation). rsuderman: The Scale32 boolean is fairly superfluous. If Scale32 is false it means the input is an i48…
		// Set the channel dimension to match the number of shift/broadcast
		// channels.
		if (!multiplierShape.empty())
		multiplierShape.back() = multiplierValues.size();
		if (!shiftShape.empty())
		shiftShape.back() = shiftValues.size();

		// Create the tensor types.
		auto multiplierType =
		RankedTensorType::get(multiplierShape, rewriter.getI32Type());
		auto shiftType =
		RankedTensorType::get(shiftShape, rewriter.getIntegerType(8));

		auto multiplierConst = rewriter.create<ConstantOp>(
		silvasUnsubmitted Done Reply Inline Actions ah yes, now I see why this is not just a trivial elementwise op. silvas: ah yes, now I see why this is not just a trivial elementwise op.
		loc, DenseIntElementsAttr::get(multiplierType, multiplierValues));

		auto shiftConst = rewriter.create<ConstantOp>(
		loc, DenseIntElementsAttr::get(shiftType, shiftValues));

		// Construct the indexing maps needed for linalg.generic ops.
		SmallVector<Type> bodyArgTypes = {getElementTypeOrSelf(inputTy),
		rewriter.getI32Type(),
		rewriter.getI32Type()};
		Value initTensor = rewriter.create<linalg::InitTensorOp>(
		loc, ArrayRef<Value>({}), outputTy.getShape(),
		outputTy.getElementType());

		SmallVector<AffineMap, 4> indexingMaps;

		// Indexing map for input values.
		indexingMaps.push_back(rewriter.getMultiDimIdentityMap(rank));

		// Shift and multiplier will need to broadcast across their non channel
		// values.
		indexingMaps.push_back(createAffineMapForType(multiplierType, rewriter));
		indexingMaps.push_back(createAffineMapForType(shiftType, rewriter));

		// Indexing maps for output values.
		indexingMaps.push_back(rewriter.getMultiDimIdentityMap(rank));

		auto linalgOp = rewriter.create<linalg::GenericOp>(
		loc, outputTy, ValueRange{input, multiplierConst, shiftConst},
		ValueRange{initTensor}, indexingMaps, getNParallelLoopsAttrs(rank),
		[&](OpBuilder &nestedBuilder, Location nestedLoc,
		ValueRange blockArgs) {
		// For now we do all of our math in 64-bit. This is not optimal but
		// should be correct for now, consider computing correct bit depth
		// later.
		auto inputZp = createConstFromIntAttribute<int32_t>(
		op, "input_zp", nestedBuilder.getI32Type(), nestedBuilder);
		auto outputZp = createConstFromIntAttribute<int32_t>(
		op, "output_zp", nestedBuilder.getI32Type(), nestedBuilder);

		Value value = blockArgs[0];
		Value multiplier = blockArgs[1];
		Value shift = blockArgs[2];

		if (value.getType().getIntOrFloatBitWidth() < 32) {
		value = nestedBuilder.create<SignExtendIOp>(
		nestedLoc, nestedBuilder.getI32Type(), value);
		}

		value = nestedBuilder.create<SubIOp>(nestedLoc, value, inputZp);

		value = nestedBuilder.create<tosa::ApplyScaleOp>(
		loc, nestedBuilder.getI32Type(), value, multiplier, shift,
		nestedBuilder.getBoolAttr(doubleRound));

		// Move to the new zero-point.
		value = nestedBuilder.create<AddIOp>(nestedLoc, value, outputZp);

		// Saturate to the output size.
		IntegerType outIntType =
		blockArgs.back().getType().cast<IntegerType>();
		unsigned outBitWidth = outIntType.getWidth();
		auto intMin = nestedBuilder.create<ConstantOp>(
		loc, nestedBuilder.getIntegerAttr(
		nestedBuilder.getI32Type(),
		APInt::getSignedMinValue(outBitWidth).getSExtValue()));
		auto intMax = nestedBuilder.create<ConstantOp>(
		loc, nestedBuilder.getIntegerAttr(
		nestedBuilder.getI32Type(),
		APInt::getSignedMaxValue(outBitWidth).getSExtValue()));

		value = clampHelper<mlir::CmpIOp>(nestedLoc, value, intMin, intMax,
		CmpIPredicate::slt, nestedBuilder);

		if (outIntType.getWidth() < 32) {
		value =
		nestedBuilder.create<TruncateIOp>(nestedLoc, outIntType, value);
		}

		nestedBuilder.create<linalg::YieldOp>(loc, value);
		});

		rewriter.replaceOp(op, linalgOp->getResults());
		return success();
		}
		};

// At the codegen level any identity operations should be removed. Any cases		// At the codegen level any identity operations should be removed. Any cases
// where identity is load-bearing (e.g. cross device computation) should be		// where identity is load-bearing (e.g. cross device computation) should be
// handled before lowering to codegen.		// handled before lowering to codegen.
template <typename SrcOp>		template <typename SrcOp>
class IdentityNConverter : public OpRewritePattern<SrcOp> {		class IdentityNConverter : public OpRewritePattern<SrcOp> {
public:		public:
using OpRewritePattern<SrcOp>::OpRewritePattern;		using OpRewritePattern<SrcOp>::OpRewritePattern;

▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines	patterns->insert<
PointwiseConverter<tosa::GreaterEqualOp>,		PointwiseConverter<tosa::GreaterEqualOp>,
PointwiseConverter<tosa::MaximumOp>, PointwiseConverter<tosa::MinimumOp>,		PointwiseConverter<tosa::MaximumOp>, PointwiseConverter<tosa::MinimumOp>,
PointwiseConverter<tosa::CeilOp>, PointwiseConverter<tosa::FloorOp>,		PointwiseConverter<tosa::CeilOp>, PointwiseConverter<tosa::FloorOp>,
PointwiseConverter<tosa::ClampOp>, PointwiseConverter<tosa::ReluNOp>,		PointwiseConverter<tosa::ClampOp>, PointwiseConverter<tosa::ReluNOp>,
IdentityNConverter<tosa::IdentityOp>,		IdentityNConverter<tosa::IdentityOp>,
IdentityNConverter<tosa::IdentityNOp>, ReduceConverter<tosa::ReduceMinOp>,		IdentityNConverter<tosa::IdentityNOp>, ReduceConverter<tosa::ReduceMinOp>,
ReduceConverter<tosa::ReduceMaxOp>, ReduceConverter<tosa::ReduceSumOp>,		ReduceConverter<tosa::ReduceMaxOp>, ReduceConverter<tosa::ReduceSumOp>,
ReduceConverter<tosa::ReduceProdOp>, ConcatOpConversion,		ReduceConverter<tosa::ReduceProdOp>, ConcatOpConversion,
ReshapeOpConverter, TransposeConverter>(context);		ReshapeOpConverter, TransposeConverter, RescaleOpConverter>(context);
}		}

mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp

Show All 36 Lines	public:
}		}

void runOnFunction() override {		void runOnFunction() override {
OwningRewritePatternList patterns;		OwningRewritePatternList patterns;
ConversionTarget target(getContext());		ConversionTarget target(getContext());
target.addLegalDialect<linalg::LinalgDialect, memref::MemRefDialect,		target.addLegalDialect<linalg::LinalgDialect, memref::MemRefDialect,
StandardOpsDialect>();		StandardOpsDialect>();
target.addIllegalDialect<tosa::TosaDialect>();		target.addIllegalDialect<tosa::TosaDialect>();

		// Not every TOSA op can be legalized to linalg.
		target.addLegalOp<tosa::ApplyScaleOp>();
		target.addLegalOp<tosa::IfOp>();
		target.addLegalOp<tosa::ConstOp>();
		target.addLegalOp<tosa::WhileOp>();

target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });		target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });

FuncOp func = getFunction();		FuncOp func = getFunction();
mlir::tosa::populateTosaToLinalgOnTensorsConversionPatterns(		mlir::tosa::populateTosaToLinalgOnTensorsConversionPatterns(
func.getContext(), &patterns);		func.getContext(), &patterns);
if (failed(applyFullConversion(func, target, std::move(patterns))))		if (failed(applyFullConversion(func, target, std::move(patterns))))
signalPassFailure();		signalPassFailure();
}		}
Show All 11 Lines

mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp

Show All 40 Lines	LogicalResult matchAndRewrite(tosa::SliceOp sliceOp,
Value input = sliceOp.input();		Value input = sliceOp.input();
SmallVector<int64_t> strides;		SmallVector<int64_t> strides;
strides.resize(sliceOp.getType().template cast<ShapedType>().getRank(), 1);		strides.resize(sliceOp.getType().template cast<ShapedType>().getRank(), 1);

rewriter.replaceOpWithNewOp<SubTensorOp>(		rewriter.replaceOpWithNewOp<SubTensorOp>(
sliceOp, sliceOp.getType(), input, ValueRange({}), ValueRange({}),		sliceOp, sliceOp.getType(), input, ValueRange({}), ValueRange({}),
ValueRange({}), sliceOp.start(), sliceOp.size(),		ValueRange({}), sliceOp.start(), sliceOp.size(),
rewriter.getI64ArrayAttr(strides));		rewriter.getI64ArrayAttr(strides));
		return success();
		}
		};

		// This converts the TOSA ApplyScale operator to a set of StandardOps ops,
		// using 64-bit operations to perform the necessary multiply, bias, and shift.
		// Multiple types are used to use minimal bit width operations.
		class ApplyScaleOpConverter : public OpRewritePattern<tosa::ApplyScaleOp> {
		public:
		using OpRewritePattern<tosa::ApplyScaleOp>::OpRewritePattern;

		LogicalResult matchAndRewrite(tosa::ApplyScaleOp op,
		PatternRewriter &rewriter) const final {
		Location loc = op.getLoc();
		Value value32 = op.value();
		Value multiplier32 = op.multiplier();
		Value shift8 = op.shift();
		bool doubleRound = op.double_round();

		Value one8 = rewriter.create<ConstantOp>(
		loc, rewriter.getIntegerAttr(rewriter.getIntegerType(8), 1));
		Value one32 = rewriter.create<ConstantOp>(
		loc, rewriter.getIntegerAttr(rewriter.getI32Type(), 1));
		Value one64 = rewriter.create<ConstantOp>(
		loc, rewriter.getIntegerAttr(rewriter.getI64Type(), 1));

		Value shiftSubOne8 = rewriter.create<SubIOp>(loc, shift8, one8);

		// The rounding value semantics below equate to the following code:
		// int64_t round = 1 << (shift - 1);
		// if (double_round) {
		// if (shift > 31 && value >= 0) round += 1<<30;
		// if (shift > 31 && value < 0) round -= 1<<30;
		// }
		//
		// Note that minimal bitwidth operators are used throughout the block.

		Value shift32 = rewriter.create<mlir::SignExtendIOp>(
		loc, rewriter.getI32Type(), shift8);

		Value round64 = rewriter.create<mlir::ShiftLeftOp>(
		loc, one64,
		rewriter.create<SignExtendIOp>(loc, rewriter.getI64Type(),
		shiftSubOne8));

		// Double rounding is performing a round operation before the shift
		if (doubleRound) {
		Value zero32 = rewriter.create<ConstantOp>(
		loc, rewriter.getZeroAttr(rewriter.getI32Type()));
		Value thirty32 = rewriter.create<ConstantOp>(
		loc, rewriter.getIntegerAttr(rewriter.getI32Type(), 30));

		Value shiftThirty32 =
		rewriter.create<mlir::ShiftLeftOp>(loc, one32, thirty32);
		Value shiftThirty64 = rewriter.create<mlir::SignExtendIOp>(
		loc, rewriter.getI64Type(), shiftThirty32);

		// Round value needs to with be added or sbustracted depending on
		Value roundAdd64 =
		rewriter.create<mlir::AddIOp>(loc, round64, shiftThirty64);
		Value roundSub64 =
		rewriter.create<mlir::SubIOp>(loc, round64, shiftThirty64);

		Value valueGreaterThanZero = rewriter.create<mlir::CmpIOp>(
		loc, CmpIPredicate::sge, value32, zero32);

		Value doubleRound64 = rewriter.create<mlir::SelectOp>(
		loc, valueGreaterThanZero, roundAdd64, roundSub64);

		// We only perform double rounding if the shift value is greater than 32.
		Value thirtyTwo32 = rewriter.create<ConstantOp>(
		loc, rewriter.getIntegerAttr(rewriter.getI32Type(), 32));
		Value shiftGreaterThanThirtyTwo = rewriter.create<mlir::CmpIOp>(
		loc, CmpIPredicate::sge, shift32, thirtyTwo32);
		round64 = rewriter.create<mlir::SelectOp>(loc, shiftGreaterThanThirtyTwo,
		doubleRound64, round64);
		}

		// The computation below equates to the following pseudocode:
		// int64_t result = (int64_t)value * multiplier + round;
		// result = result >> shift;
		//
		// Note that multiply and shift need to be perform in i64 to preserve bits.

		Value value64 =
		rewriter.create<SignExtendIOp>(loc, rewriter.getI64Type(), value32);
		Value multiplier64 = rewriter.create<SignExtendIOp>(
		loc, rewriter.getI64Type(), multiplier32);
		Value shift64 =
		rewriter.create<SignExtendIOp>(loc, rewriter.getI64Type(), shift8);

		// Multiply as a pair of i64 values to guarantee the end value fits.
		Value result64 = rewriter.create<MulIOp>(loc, value64, multiplier64);
		result64 = rewriter.create<AddIOp>(loc, result64, round64);
		result64 =
		rewriter.create<mlir::SignedShiftRightOp>(loc, result64, shift64);

		Value result32 = rewriter.create<mlir::TruncateIOp>(
		loc, rewriter.getI32Type(), result64);

		rewriter.replaceOp(op, result32);
return success();		return success();
}		}
};		};

} // namespace		} // namespace

void mlir::tosa::populateTosaToStandardConversionPatterns(		void mlir::tosa::populateTosaToStandardConversionPatterns(
MLIRContext context, OwningRewritePatternList patterns) {		MLIRContext context, OwningRewritePatternList patterns) {
patterns->insert<ConstOpConverter, SliceOpConverter>(context);		patterns->insert<ApplyScaleOpConverter, ConstOpConverter, SliceOpConverter>(
		context);
		}

		void mlir::tosa::populateTosaRescaleToStandardConversionPatterns(
		MLIRContext context, OwningRewritePatternList patterns) {
		patterns->insert<ApplyScaleOpConverter>(context);
}		}

mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp

	Show All 27 Lines
	namespace {			namespace {
	struct TosaToStandard : public TosaToStandardBase<TosaToStandard> {			struct TosaToStandard : public TosaToStandardBase<TosaToStandard> {
	public:			public:
	void runOnOperation() override {			void runOnOperation() override {
	OwningRewritePatternList patterns;			OwningRewritePatternList patterns;
	ConversionTarget target(getContext());			ConversionTarget target(getContext());
	target.addIllegalOp<tosa::ConstOp>();			target.addIllegalOp<tosa::ConstOp>();
	target.addIllegalOp<tosa::SliceOp>();			target.addIllegalOp<tosa::SliceOp>();
				target.addIllegalOp<tosa::ApplyScaleOp>();
	target.addLegalDialect<StandardOpsDialect>();			target.addLegalDialect<StandardOpsDialect>();

	auto *op = getOperation();			auto *op = getOperation();
	mlir::tosa::populateTosaToStandardConversionPatterns(op->getContext(),			mlir::tosa::populateTosaToStandardConversionPatterns(op->getContext(),
	&patterns);			&patterns);
	if (failed(applyPartialConversion(op, target, std::move(patterns))))			if (failed(applyPartialConversion(op, target, std::move(patterns))))
	signalPassFailure();			signalPassFailure();
	}			}
	Show All 10 Lines

mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir

Show First 20 Lines • Show All 445 Lines • ▼ Show 20 Lines	func @concat(%arg0: tensor<5x1xf32>, %arg1: tensor<6x1xf32>) -> () {
// CHECK: [[IDX1:%.+]] = constant 1 : index		// CHECK: [[IDX1:%.+]] = constant 1 : index
// CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[IDX1]]		// CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[IDX1]]
// CHECK: [[ARG1_AXIS:%.+]] = memref.dim %arg1, [[AXIS]]		// CHECK: [[ARG1_AXIS:%.+]] = memref.dim %arg1, [[AXIS]]
// CHECK: [[RESULT_AXIS:%.+]] = addi [[ARG0_DIM0]], [[ARG1_AXIS]]		// CHECK: [[RESULT_AXIS:%.+]] = addi [[ARG0_DIM0]], [[ARG1_AXIS]]
// CHECK: [[INIT:%.+]] = linalg.init_tensor [11, 1]		// CHECK: [[INIT:%.+]] = linalg.init_tensor [11, 1]
// CHECK: [[ARG0_DIM0:%.+]] = memref.dim %arg0, [[AXIS]]		// CHECK: [[ARG0_DIM0:%.+]] = memref.dim %arg0, [[AXIS]]
// CHECK: [[INSERT0:%.+]] = subtensor_insert %arg0 into [[INIT]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]		// CHECK: [[INSERT0:%.+]] = subtensor_insert %arg0 into [[INIT]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
// CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM0]]		// CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM0]]
// CHECK: [[ARG1_DIM0:%.+]] = memref.dim %arg1, [[AXIS]]		// CHECK: [[ARG1_DIM0:%.+]] = memref.dim %arg1, [[AXIS]]
		silvasUnsubmitted Done Reply Inline Actions it feels like a better definition of tosa.apply_scale would be one that makes this entire body reduce to a single tosa.apply_scale op (perhaps with a constant). I.e. include all the details into there. silvas: it feels like a better definition of tosa.apply_scale would be one that makes this entire body…
		rsudermanAuthorUnsubmitted Done Reply Inline Actions Discussed above but the additional code is unique to rescale while ApplyScale is a replicated pattern in the TOSA specification. If we want a reduction a formal clamp operator could be useful (as that is replicated in a number of operations). But that feels like a standard op improvement. rsuderman: Discussed above but the additional code is unique to rescale while ApplyScale is a replicated…
// CHECK: [[INSERT1:%.+]] = subtensor_insert %arg1 into [[INSERT0]]{{\[}}[[NEW_OFFSET]], [[OFFSET]]] {{\[}}[[ARG1_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]		// CHECK: [[INSERT1:%.+]] = subtensor_insert %arg1 into [[INSERT0]]{{\[}}[[NEW_OFFSET]], [[OFFSET]]] {{\[}}[[ARG1_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
%0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x1xf32>, tensor<6x1xf32>) -> (tensor<11x1xf32>)		%0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x1xf32>, tensor<6x1xf32>) -> (tensor<11x1xf32>)

// CHECK: [[AXIS:%.+]] = constant 1		// CHECK: [[AXIS:%.+]] = constant 1
// CHECK: [[STRIDE:%.+]] = constant 1		// CHECK: [[STRIDE:%.+]] = constant 1
// CHECK: [[OFFSET:%.+]] = constant 0 : index		// CHECK: [[OFFSET:%.+]] = constant 0 : index
// CHECK: [[IDX0:%.+]] = constant 0 : index		// CHECK: [[IDX0:%.+]] = constant 0 : index
// CHECK: [[ARG0_DIM0:%.+]] = memref.dim %arg0, [[IDX0]]		// CHECK: [[ARG0_DIM0:%.+]] = memref.dim %arg0, [[IDX0]]
// CHECK: [[IDX1:%.+]] = constant 1 : index		// CHECK: [[IDX1:%.+]] = constant 1 : index
// CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[IDX1]]		// CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[IDX1]]
// CHECK: [[ARG1_AXIS:%.+]] = memref.dim %arg0, [[AXIS]]		// CHECK: [[ARG1_AXIS:%.+]] = memref.dim %arg0, [[AXIS]]
// CHECK: [[RESULT_AXIS:%.+]] = addi [[ARG0_DIM1]], [[ARG1_AXIS]]		// CHECK: [[RESULT_AXIS:%.+]] = addi [[ARG0_DIM1]], [[ARG1_AXIS]]
// CHECK: [[INIT:%.+]] = linalg.init_tensor [5, 2]		// CHECK: [[INIT:%.+]] = linalg.init_tensor [5, 2]
// CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[AXIS]]		// CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[AXIS]]
// CHECK: [[INSERT0:%.+]] = subtensor_insert %arg0 into [[INIT]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]		// CHECK: [[INSERT0:%.+]] = subtensor_insert %arg0 into [[INIT]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
// CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM1]]		// CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM1]]
// CHECK: [[ARG1_DIM1:%.+]] = memref.dim %arg0, [[AXIS]]		// CHECK: [[ARG1_DIM1:%.+]] = memref.dim %arg0, [[AXIS]]
// CHECK: [[INSERT1:%.+]] = subtensor_insert %arg0 into [[INSERT0]]{{\[}}[[OFFSET]], [[NEW_OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG1_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]		// CHECK: [[INSERT1:%.+]] = subtensor_insert %arg0 into [[INSERT0]]{{\[}}[[OFFSET]], [[NEW_OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG1_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
%1 = "tosa.concat"(%arg0, %arg0) { axis = 1 : i64} : (tensor<5x1xf32>, tensor<5x1xf32>) -> (tensor<5x2xf32>)		%1 = "tosa.concat"(%arg0, %arg0) { axis = 1 : i64} : (tensor<5x1xf32>, tensor<5x1xf32>) -> (tensor<5x2xf32>)
return		return
}		}

		// -----

		// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
		// CHECK: #[[$MAP1:.*]] = affine_map<(d0) -> (0)>

		// CHECK-LABEL: @rescale
		func @rescale(%arg0 : tensor<1xi8>) -> (tensor<1xi8>) {
		// CHECK: [[C0:%.+]] = constant dense<19689>
		// CHECK: [[C1:%.+]] = constant dense<15>
		// CHECK: [[INIT:%.+]] = linalg.init_tensor [1]
		// CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0, [[C0]], [[C1]] : tensor<1xi8>, tensor<1xi32>, tensor<1xi8>) outs([[INIT]] : tensor<1xi8>)
		// CHECK: ^bb0([[IN:%.+]]: i8, [[MULTIPLIER:%.+]]: i32, [[SHIFT:%.+]]: i8, [[UNUSED:%.+]]: i8):
		// CHECK: [[C243:%.+]] = constant 243
		// CHECK: [[C252:%.+]] = constant 252

		// CHECK-DAG: [[IN32:%.+]] = sexti [[IN]]
		// CHECK-DAG: [[IN_ZEROED:%.+]] = subi [[IN32]], [[C243]]
		// CHECK-DAG: [[SCALED:%.+]] = "tosa.apply_scale"([[IN_ZEROED]], [[MULTIPLIER]], [[SHIFT]]) {double_round = false}
		// CHECK-DAG: [[SCALED_ZEROED:%.+]] = addi [[SCALED]], [[C252]]
		// CHECK-DAG: [[CMIN:%.+]] = constant -128
		// CHECK-DAG: [[CMAX:%.+]] = constant 127
		// CHECK-DAG: [[MINLT:%.+]] = cmpi slt, [[SCALED_ZEROED]], [[CMIN]]
		// CHECK-DAG: [[MAXLT:%.+]] = cmpi slt, [[CMAX]], [[SCALED_ZEROED]]
		// CHECK-DAG: [[LOWER:%.+]] = select [[MINLT]], [[CMIN]], [[SCALED_ZEROED]]
		// CHECK-DAG: [[BOUNDED:%.+]] = select [[MAXLT]], [[CMAX]], [[LOWER]]
		// CHECK-DAG: [[TRUNC:%.+]] = trunci [[BOUNDED]]
		// CHECK-DAG: linalg.yield [[TRUNC]]
		%0 = "tosa.rescale"(%arg0) {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<1xi8>) -> (tensor<1xi8>)

		// CHECK: return [[GENERIC]]
		return %0 : tensor<1xi8>
		}

		// CHECK-LABEL: @rescaleDoubleRound
		func @rescaleDoubleRound(%arg0 : tensor<1xi8>) -> (tensor<1xi8>) {
		// CHECK: linalg.generic
		// CHECK: "tosa.apply_scale"
		// CHECK-SAME: {double_round = true}
		%0 = "tosa.rescale"(%arg0) {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = [19689 : i32], shift = [33 : i32], scale32 = true, double_round = true, per_channel = false} : (tensor<1xi8>) -> (tensor<1xi8>)
		return %0 : tensor<1xi8>
		}

		// CHECK-LABEL: @rescaleUnnecessaryDoubleRound
		func @rescaleUnnecessaryDoubleRound(%arg0 : tensor<1xi8>) -> (tensor<1xi8>) {
		// CHECK: linalg.generic
		// CHECK: "tosa.apply_scale"
		// CHECK-SAME: {double_round = false}
		%0 = "tosa.rescale"(%arg0) {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = true, double_round = true, per_channel = false} : (tensor<1xi8>) -> (tensor<1xi8>)
		return %0 : tensor<1xi8>
		}

mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir

	// RUN: mlir-opt --split-input-file --tosa-to-standard %s -verify-diagnostics -o -\| FileCheck %s			// RUN: mlir-opt --split-input-file --tosa-to-standard %s -verify-diagnostics -o -\| FileCheck %s

	// CHECK-LABEL: func @const_test			// CHECK-LABEL: func @const_test
	func @const_test() -> (tensor<i32>) {			func @const_test() -> (tensor<i32>) {
	// CHECK: [[C3:%.+]] = constant dense<3> : tensor<i32>			// CHECK: [[C3:%.+]] = constant dense<3> : tensor<i32>
	%0 = "tosa.const"() {value = dense<3> : tensor<i32>} : () -> tensor<i32>			%0 = "tosa.const"() {value = dense<3> : tensor<i32>} : () -> tensor<i32>

	// CHECK: return [[C3]]			// CHECK: return [[C3]]
	return %0 : tensor<i32>			return %0 : tensor<i32>
	}			}

	// ----			// -----

	func @slice(%arg0: tensor<6xf32>) ->() {			func @slice(%arg0: tensor<6xf32>) ->() {
	// CHECK: [[SLICE:%.+]] = subtensor %arg0[2] [1] [1]			// CHECK: [[SLICE:%.+]] = subtensor %arg0[2] [1] [1]
	%0 = "tosa.slice"(%arg0) {start = [2], size = [1]} : (tensor<6xf32>) -> (tensor<1xf32>)			%0 = "tosa.slice"(%arg0) {start = [2], size = [1]} : (tensor<6xf32>) -> (tensor<1xf32>)
	return			return
	}			}

				// -----

				func @apply_scale_test(%arg0 : i32, %arg1 : i32, %arg2 : i8) -> (i32) {
				// CHECK: [[C1_8:%.+]] = constant 1 : i8
				// CHECK: [[C1_32:%.+]] = constant 1 : i32
				// CHECK: [[C1_64:%.+]] = constant 1 : i64
				// CHECK: [[SHIFT_MINUS_ONE_8:%.+]] = subi %arg2, [[C1_8]]

				// CHECK: [[SHIFT_32:%.+]] = sexti %arg2 : i8 to i32
				// CHECK: [[SHIFT_MINUS_ONE_64:%.+]] = sexti [[SHIFT_MINUS_ONE_8]] : i8 to i64
				// CHECK: [[SHIFTED_64:%.+]] = shift_left [[C1_64]], [[SHIFT_MINUS_ONE_64]]

				// CHECK: [[C0_32:%.+]] = constant 0 : i32
				// CHECK: [[C30_32:%.+]] = constant 30 : i32
				// CHECK: [[SECOND_BIAS:%.+]] = shift_left [[C1_32]], [[C30_32]]
				// CHECK: [[SECOND_BIAS_64:%.+]] = sexti [[SECOND_BIAS]] : i32 to i64
				// CHECK: [[POSITIVE_ROUND:%.+]] = addi [[SHIFTED_64]], [[SECOND_BIAS_64]]
				// CHECK: [[NEGATIVE_ROUND:%.+]] = subi [[SHIFTED_64]], [[SECOND_BIAS_64]]
				// CHECK: [[VALUE_NEGATIVE:%.+]] = cmpi sge, %arg0, [[C0_32]] : i32
				// CHECK: [[DOUBLE_ROUNDED:%.+]] = select [[VALUE_NEGATIVE]], [[POSITIVE_ROUND]], [[NEGATIVE_ROUND]] : i64
				// CHECK: [[C32_32:%.+]] = constant 32 : i32
				// CHECK: [[IS_32BIT_SHIFT:%.+]] = cmpi sge, [[SHIFT_32]], [[C32_32]]
				// CHECK: [[ROUND:%.+]] = select [[IS_32BIT_SHIFT]], [[DOUBLE_ROUNDED]], [[SHIFTED_64]]

				// CHECK: [[VAL_64:%.+]] = sexti %arg0 : i32 to i64
				// CHECK: [[MULTIPLY_64:%.+]] = sexti %arg1 : i32 to i64
				// CHECK: [[SHIFT_64:%.+]] = sexti %arg2 : i8 to i64
				// CHECK: [[SCALED:%.+]] = muli [[VAL_64]], [[MULTIPLY_64]]
				// CHECK: [[BIASED:%.+]] = addi [[SCALED]], [[ROUND]]
				// CHECK: [[DOWNSHIFTED:%.+]] = shift_right_signed [[BIASED]], [[SHIFT_64]]
				// CHECK: [[TRUNCATED:%.+]] = trunci [[DOWNSHIFTED]]

				%0 = "tosa.apply_scale"(%arg0, %arg1, %arg2) {double_round = true} : (i32, i32, i8) -> i32
				return %0 : i32
				}

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][tosa] Add lowering for tosa.rescale to linalg.generic
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 331719

mlir/include/mlir/Conversion/TosaToStandard/TosaToStandard.h

mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td

mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td

mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp

mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp

mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp

mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp

mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir

mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][tosa] Add lowering for tosa.rescale to linalg.genericClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 331719

mlir/include/mlir/Conversion/TosaToStandard/TosaToStandard.h

mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td

mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td

mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp

mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp

mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp

mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp

mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir

mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir

[mlir][tosa] Add lowering for tosa.rescale to linalg.generic
ClosedPublic