diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -221,6 +221,67 @@ } +//===----------------------------------------------------------------------===// +// Reduce op. +//===----------------------------------------------------------------------===// + +def TensorOrMemref : + AnyTypeOf<[AnyMemRef, AnyRankedTensor], "", "::mlir::ShapedType">; + +def ReduceOp : LinalgStructuredBase_Op<"reduce", [ + SameVariadicOperandSize, SingleBlockImplicitTerminator<"YieldOp"> + ]> { + let summary = "Reduce operator"; + let description = [{ + Executes `combiner` on the `dimensions` of `inputs` and returns the + reduced result. The `dimensions` attribute needs to list the reduction + dimensions in increasing order. + + Example: + ``` + %reduce = linalg.reduce + ins(%input:tensor<16x32x64xf32>) + outs(%init:tensor<16x64xf32>) + dimensions = [1] + (%in: f32, %out: f32) { + %0 = arith.addf %in, %out: f32 + linalg.yield %0: f32 + } + ``` + }]; + + let arguments = (ins + // Input arg + Variadic:$inputs, + // Output arg + Variadic:$inits, + + DenseI64ArrayAttr:$dimensions + ); + let results = (outs Variadic); + let regions = (region SizedRegion<1>:$combiner); + + let extraClassDeclaration = structuredOpsBaseDecls # [{ + // Declare functions necessary for LinalgStructuredInterface. + ArrayAttr iterator_types(); + ArrayAttr getIndexingMaps(); + + // Implement functions necessary for DestinationStyleOpInterface. + mlir::ValueRange outputs() { return getInits(); } + unsigned getNumInputs() { return getInputs().size(); }; + unsigned getNumOutputs() { return getInits().size(); }; + static std::function)> + getRegionBuilder() { + return nullptr; + } + }]; + + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; +} + + //===----------------------------------------------------------------------===// // Named Linalg ops, implemented as a declarative configurations of generic ops. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -124,7 +124,8 @@ static ParseResult parseCommonStructuredOpParts(OpAsmParser &parser, OperationState &result, SmallVectorImpl &inputTypes, - SmallVectorImpl &outputTypes) { + SmallVectorImpl &outputTypes, + bool addOperandSegmentSizes = true) { SMLoc inputsOperandsLoc, outputsOperandsLoc; SmallVector inputsOperands, outputsOperands; @@ -155,10 +156,12 @@ result.operands)) return failure(); - result.addAttribute("operand_segment_sizes", - parser.getBuilder().getDenseI32ArrayAttr( - {static_cast(inputsOperands.size()), - static_cast(outputsOperands.size())})); + if (addOperandSegmentSizes) { + result.addAttribute("operand_segment_sizes", + parser.getBuilder().getDenseI32ArrayAttr( + {static_cast(inputsOperands.size()), + static_cast(outputsOperands.size())})); + } return success(); } @@ -1180,6 +1183,234 @@ return foldMemRefCast(*this); } +//===----------------------------------------------------------------------===// +// ReduceOp +//===----------------------------------------------------------------------===// + +ArrayAttr ReduceOp::iterator_types() { + int64_t inputRank = getInputs()[0].getType().cast().getRank(); + SmallVector iteratorTypes(inputRank, + getParallelIteratorTypeName()); + for (int64_t reductionDim : getDimensions()) + iteratorTypes[reductionDim] = getReductionIteratorTypeName(); + return Builder(getContext()).getStrArrayAttr(iteratorTypes); +} + +ArrayAttr ReduceOp::getIndexingMaps() { + SmallVector affineMaps; + int64_t inputRank = getInputs()[0].getType().cast().getRank(); + for (int64_t i = 0, e = getNumInputs(); i < e; ++i) { + affineMaps.push_back( + AffineMap::getMultiDimIdentityMap(inputRank, getContext())); + } + SmallVector exprs; + ArrayRef dimensionsRef = getDimensions(); + for (int64_t i = 0, j = 0; i < inputRank; ++i) { + bool isReductionDim = j < dimensionsRef.size() && dimensionsRef[j] == i; + if (isReductionDim) { + ++j; + } else { + exprs.push_back(getAffineDimExpr(i, getContext())); + } + } + for (int64_t i = 0, e = getNumOutputs(); i < e; ++i) { + affineMaps.push_back( + AffineMap::get(inputRank, /*symbolCount=*/0, exprs, getContext())); + } + return Builder(getContext()).getAffineMapArrayAttr(affineMaps); +} + +void ReduceOp::getEffects( + SmallVectorImpl> + &effects) { + SmallVector inputBuffers = getInputBufferOperands(); + SmallVector outputBuffers = getOutputBufferOperands(); + getGenericEffectsImpl(effects, getOperation()->getResults(), inputBuffers, + outputBuffers); +} + +static ParseResult parseDstStyleOp( + OpAsmParser &parser, OperationState &result, + function_ref parseAttrsFn = + nullptr) { + // Parse `ins` and `outs`. + SmallVector inputTypes, outputTypes; + if (parseCommonStructuredOpParts(parser, result, inputTypes, outputTypes, + /*addOperandSegmentSizes=*/false)) + return failure(); + + // Add result types. + for (Type outputType : outputTypes) { + if (outputType.isa()) + result.addTypes(outputType); + } + + // Parse required attributes. + if (parseAttrsFn && failed(parseAttrsFn(parser, result.attributes))) + return failure(); + + // Parse optional attributes. + if (parser.parseOptionalAttrDict(result.attributes)) + return failure(); + return success(); +} + +static ParseResult parseDenseI64ArrayAttr(OpAsmParser &parser, + NamedAttrList &attributes, + StringRef attributeName) { + if (parser.parseKeyword(attributeName) || parser.parseEqual()) + return failure(); + + attributes.set(attributeName, DenseI64ArrayAttr::parse(parser, Type{})); + return success(); +} + +ParseResult ReduceOp::parse(OpAsmParser &parser, OperationState &result) { + if (parseDstStyleOp( + parser, result, [&](OpAsmParser &parser, NamedAttrList &attributes) { + return parseDenseI64ArrayAttr(parser, attributes, "dimensions"); + })) + return failure(); + + SmallVector regionArgs; + if (parser.parseArgumentList(regionArgs, OpAsmParser::Delimiter::Paren, + /*allowType=*/true, /*allowAttrs=*/true)) { + return failure(); + } + + Region *body = result.addRegion(); + if (parser.parseRegion(*body, regionArgs)) + return failure(); + + return success(); +} + +static void printDenseI64ArrayAttr(OpAsmPrinter &p, StringRef attributeName, + ArrayRef attributeValue) { + p << " " << attributeName << " = [" << attributeValue << "] "; +} + +void ReduceOp::print(OpAsmPrinter &p) { + printCommonStructuredOpParts(p, getInputs(), outputs()); + printDenseI64ArrayAttr(p, getDimensionsAttrName(), getDimensions()); + p.printOptionalAttrDict((*this)->getAttrs(), {getDimensionsAttrName()}); + + p << "("; + llvm::interleaveComma(getCombiner().getArguments(), p, + [&](auto arg) { p.printRegionArgument(arg); }); + p << ") "; + + p.printRegion(getCombiner(), /*printEntryBlockArgs=*/false); +} + +static bool dimensionsMatch(int64_t d1, int64_t d2) { + return ShapedType::isDynamic(d1) || ShapedType::isDynamic(d2) || d1 == d2; +} + +LogicalResult ReduceOp::verify() { + ArrayRef dimensionsRef = getDimensions(); + + for (int64_t i = 1; i < getNumInputs(); ++i) { + if (failed(mlir::verifyCompatibleShape(getInputs()[i].getType(), + getInputs()[0].getType()))) { + return emitOpError() << "expects all inputs to have compatible shapes. " + "Shape at input-index " + << i + << " is not compatible with shape at input-index 0."; + } + } + for (int64_t i = 1; i < getNumOutputs(); ++i) { + if (failed(mlir::verifyCompatibleShape(getInits()[i].getType(), + getInits()[0].getType()))) { + return emitOpError() + << "expects all outputs to have compatible shapes. " + "Shape at output-index " + << i << " is not compatible with shape at output-index 0."; + } + } + auto inputType = getInputs()[0].getType().cast(); + auto initType = getInits()[0].getType().cast(); + + DenseSet dimensionsToReduce; + int64_t lastDimension = -1; + for (int64_t dimension : dimensionsRef) { + if (dimension < 0 || dimension >= inputType.getRank()) { + return emitOpError() + << "dimensions for reduction should be in the range [0, " + << inputType.getRank() - 1 << "]."; + } + if (dimension <= lastDimension) { + return emitOpError() + << "reduction dimensions are not in increasing order: " + << dimensionsRef; + } + + lastDimension = dimension; + dimensionsToReduce.insert(dimension); + } + + auto inputDims = inputType.getShape(); + auto initDims = initType.getShape(); + + // Input dimensions that will be left after the reduction. + SmallVector reducedInputDims; + for (const auto &en : llvm::enumerate(inputDims)) { + if (!dimensionsToReduce.count(en.index())) + reducedInputDims.push_back(en.value()); + } + + if (reducedInputDims.size() != initType.getRank()) { + return emitOpError() << "number of dimensions after reduction " + << reducedInputDims.size() + << " doesn't match the init rank " + << initType.getRank(); + } + + if (!all_of_zip(reducedInputDims, initDims, &dimensionsMatch)) + return emitOpError() << "init dimensions [" << initDims + << "] doesn't match input dimensions after reduction [" + << reducedInputDims << "]"; + + Block *block = getBody(); + if (static_cast(block->getArguments().size()) != + getNumInputs() + getNumOutputs()) { + return emitOpError() + << "number of block arguments " << block->getArguments().size() + << " doesn't match the number of inputs plus the number of outputs " + << getNumInputs() + getNumOutputs(); + } + + // Check that the first block arguments match the element type of the inputs. + auto inputElementTypes = + llvm::to_vector<8>(llvm::map_range(getInputs().getTypes(), [](Type type) { + return type.cast().getElementType(); + })); + auto blockArgumentInputTypes = llvm::to_vector<8>( + llvm::map_range(block->getArguments().take_front(getNumInputs()), + [](BlockArgument arg) { return arg.getType(); })); + if (blockArgumentInputTypes != inputElementTypes) { + return emitOpError() << "input element types " << inputElementTypes + << " do not match block argument types " + << blockArgumentInputTypes; + } + + // Check that the last block arguments match the element type of the outputs. + auto outputElementTypes = + llvm::to_vector<8>(llvm::map_range(getInits().getTypes(), [](Type type) { + return type.cast().getElementType(); + })); + auto blockArgumentOutputTypes = llvm::to_vector<8>( + llvm::map_range(block->getArguments().take_back(getNumOutputs()), + [](BlockArgument arg) { return arg.getType(); })); + if (blockArgumentOutputTypes != outputElementTypes) { + return emitOpError() << "output element types " << outputElementTypes + << " do not match block argument types " + << blockArgumentOutputTypes; + } + + return success(); +} + //===----------------------------------------------------------------------===// // InitTensorOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -415,3 +415,175 @@ } return } + +// ----- + +func.func @reduce_input_vs_init_dimension_mismatch( + %input: tensor<16x32x64xf32>, + %init: tensor<16x64xf32>) -> tensor<16x64xf32> { + // expected-error @+1 {{'linalg.reduce' op init dimensions [16, 64] doesn't match input dimensions after reduction [16, 32]}} + %reduce = linalg.reduce + ins(%input:tensor<16x32x64xf32>) + outs(%init:tensor<16x64xf32>) + dimensions = [2] + (%in: f32, %out: f32) { + %0 = arith.addf %in, %out: f32 + linalg.yield %0: f32 + } + func.return %reduce : tensor<16x64xf32> +} + +// ----- + +func.func @reduce_dimensions_out_of_range(%input: tensor<16x32x64xf32>, + %init: tensor<16x64xf32>) -> tensor<16x64xf32> { + // expected-error @+1 {{'linalg.reduce' op dimensions for reduction should be in the range [0, 2].}} + %reduce = linalg.reduce + ins(%input:tensor<16x32x64xf32>) + outs(%init:tensor<16x64xf32>) + dimensions = [3] + (%in: f32, %out: f32) { + %0 = arith.addf %in, %out: f32 + linalg.yield %0: f32 + } + func.return %reduce : tensor<16x64xf32> +} + +// ----- + +func.func @reduce_duplicate_dimensions(%input: tensor<16x32x64xf32>, + %init: tensor<16xf32>) -> tensor<16xf32> { + // expected-error @+1 {{'linalg.reduce' op reduction dimensions are not in increasing order: 1, 1}} + %reduce = linalg.reduce + ins(%input:tensor<16x32x64xf32>) + outs(%init:tensor<16xf32>) + dimensions = [1, 1] + (%in: f32, %out: f32) { + %0 = arith.addf %in, %out: f32 + linalg.yield %0: f32 + } + func.return %reduce : tensor<16xf32> +} + +// ----- + +func.func @reduce_non_increasing_dimensions(%input: tensor<16x32x64xf32>, + %init: tensor<16xf32>) -> tensor<16xf32> { + // expected-error @+1 {{'linalg.reduce' op reduction dimensions are not in increasing order: 2, 1}} + %reduce = linalg.reduce + ins(%input:tensor<16x32x64xf32>) + outs(%init:tensor<16xf32>) + dimensions = [2, 1] + (%in: f32, %out: f32) { + %0 = arith.addf %in, %out: f32 + linalg.yield %0: f32 + } + func.return %reduce : tensor<16xf32> +} + +// ----- + +func.func @reduce_reduced_input_init_rank_mismatch(%input: tensor<16x32x64xf32>, + %init: tensor<16x64xf32>) -> tensor<16x64xf32> { + // expected-error @+1 {{'linalg.reduce' op number of dimensions after reduction 1 doesn't match the init rank 2}} + %reduce = linalg.reduce + ins(%input:tensor<16x32x64xf32>) + outs(%init:tensor<16x64xf32>) + dimensions = [1, 2] + (%in: f32, %out: f32) { + %0 = arith.addf %in, %out: f32 + linalg.yield %0: f32 + } + func.return %reduce : tensor<16x64xf32> +} + +// ----- + +func.func @reduce_wrong_number_of_block_arguments( + %input1: tensor<16x32x64xf32>, + %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xf32>, + %init2: tensor<16x64xf32>) -> (tensor<16x64xf32>, tensor<16x64xf32>) { + // expected-error @+1{{'linalg.reduce' op number of block arguments 2 doesn't match the number of inputs plus the number of outputs 4}} + %reduce, %reduce2 = linalg.reduce + ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) + outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>) + dimensions = [1] + (%in: f32, %out: f32) { + %0 = arith.addf %in, %out: f32 + linalg.yield %0: f32 + } + func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<16x64xf32> +} + +// ----- + +func.func @reduce_wrong_block_argument_input_type( + %input1: tensor<16x32x64xf32>, + %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xf32>, + %init2: tensor<16x64xf32>) -> (tensor<16x64xf32>, tensor<16x64xf32>) { + // expected-error @+1{{'linalg.reduce' op input element types 'f32', 'f32' do not match block argument types 'f32', 'f64'}} + %reduce, %reduce2 = linalg.reduce + ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) + outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>) + dimensions = [1] + (%in1: f32, %in2: f64, %out1: f32, %out2: f64) { + %0 = arith.addf %in1, %out1: f32 + %1 = arith.addf %in2, %out2: f64 + linalg.yield %0, %1: f32, f64 + } + func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<16x64xf32> +} + +// ----- + +func.func @reduce_wrong_block_argument_output_type( + %input1: tensor<16x32x64xf32>, + %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xf32>, + %init2: tensor<16x64xf32>) -> (tensor<16x64xf32>, tensor<16x64xf32>) { + // expected-error @+1{{'linalg.reduce' op output element types 'f32', 'f32' do not match block argument types 'f32', 'f64'}} + %reduce, %reduce2 = linalg.reduce + ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) + outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>) + dimensions = [1] + (%in1: f32, %in2: f32, %out1: f32, %out2: f64) { + %0 = arith.addf %in1, %out1: f32 + linalg.yield %0, %out2: f32, f64 + } + func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<16x64xf32> +} + +// ----- + +func.func @reduce_incompatible_input_shapes(%input1: tensor<16x32x64xf32>, + %init1: tensor<16x64xf32>, %input2: tensor<17x32x64xf32>, + %init2: tensor<17x64xf32>) -> (tensor<16x64xf32>, tensor<17x64xf32>) { + // expected-error @+1{{'linalg.reduce' op expects all inputs to have compatible shapes. Shape at input-index 1 is not compatible with shape at input-index 0.}} + %reduce, %reduce2 = linalg.reduce + ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<17x32x64xf32>) + outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>) + dimensions = [1] + (%in1: f32, %in2: f32, %out1: f32, %out2: f32) { + %0 = arith.addf %in1, %out1: f32 + %1 = arith.addf %in2, %out2: f32 + linalg.yield %0, %1: f32, f32 + } + func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<17x64xf32> +} + +// ----- + +func.func @reduce_incompatible_output_shapes(%input1: tensor<16x32x64xf32>, + %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xf32>, + %init2: tensor<17x64xf32>) -> (tensor<16x64xf32>, tensor<17x64xf32>) { + // expected-error @+1{{'linalg.reduce' op expects all outputs to have compatible shapes. Shape at output-index 1 is not compatible with shape at output-index 0.}} + %reduce, %reduce2 = linalg.reduce + ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) + outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>) + dimensions = [1] + (%in1: f32, %in2: f32, %out1: f32, %out2: f32) { + %0 = arith.addf %in1, %out1: f32 + %1 = arith.addf %in2, %out2: f32 + linalg.yield %0, %1: f32, f32 + } + func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<17x64xf32> +} diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -366,3 +366,39 @@ } // CHECK-LABEL: func @mixed_parallel_reduced_results // CHECK: linalg.generic + +// ----- + +func.func @reduce(%input: tensor<16x32x64xf32>, + %init: tensor<16x64xf32>) -> tensor<16x64xf32> { + %reduce = linalg.reduce + ins(%input:tensor<16x32x64xf32>) + outs(%init:tensor<16x64xf32>) + dimensions = [1] + (%in: f32, %out: f32) { + %0 = arith.addf %in, %out: f32 + linalg.yield %0: f32 + } + func.return %reduce : tensor<16x64xf32> +} +// CHECK-LABEL: func @reduce +// CHECK: linalg.reduce + +// ----- + +func.func @variadic_reduce(%input1: tensor<16x32x64xf32>, + %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xi64>, + %init2: tensor<16x64xi64>) -> (tensor<16x64xf32>, tensor<16x64xi64>) { + %reduce, %reduce2 = linalg.reduce + ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xi64>) + outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xi64>) + dimensions = [1] + (%in1: f32, %in2: i64, %out1: f32, %out2: i64) { + %0 = arith.addf %in1, %out1: f32 + %1 = arith.addi %in2, %out2: i64 + linalg.yield %0, %1: f32, i64 + } + func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<16x64xi64> +} +// CHECK-LABEL: func @variadic_reduce +// CHECK: linalg.reduce