diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -221,6 +221,67 @@
 }
 
 
+//===----------------------------------------------------------------------===//
+// Reduce op.
+//===----------------------------------------------------------------------===//
+
+def TensorOrMemref :
+  AnyTypeOf<[AnyMemRef, AnyRankedTensor], "", "::mlir::ShapedType">;
+
+def ReduceOp : LinalgStructuredBase_Op<"reduce", [
+      SameVariadicOperandSize, SingleBlockImplicitTerminator<"YieldOp">
+    ]> {
+  let summary = "Reduce operator";
+  let description = [{
+    Executes `combiner` on the `dimensions` of `inputs` and returns the
+    reduced result. The `dimensions` attribute needs to list the reduction
+    dimensions in increasing order.
+
+    Example:
+    ```
+      %reduce = linalg.reduce
+          ins(%input:tensor<16x32x64xf32>)
+          outs(%init:tensor<16x64xf32>)
+          dimensions = [1]
+          (%in: f32, %out: f32) {
+            %0 = arith.addf %in, %out: f32
+            linalg.yield %0: f32
+          }
+    ```
+  }];
+
+  let arguments = (ins
+    // Input arg
+    Variadic<TensorOrMemref>:$inputs,
+    // Output arg
+    Variadic<TensorOrMemref>:$inits,
+
+    DenseI64ArrayAttr:$dimensions
+  );
+  let results = (outs Variadic<TensorOrMemref>);
+  let regions = (region SizedRegion<1>:$combiner);
+
+  let extraClassDeclaration = structuredOpsBaseDecls # [{
+    // Declare functions necessary for LinalgStructuredInterface.
+    ArrayAttr iterator_types();
+    ArrayAttr getIndexingMaps();
+
+    // Implement functions necessary for DestinationStyleOpInterface.
+    mlir::ValueRange outputs() { return getInits(); }
+    unsigned getNumInputs() { return getInputs().size(); };
+    unsigned getNumOutputs() { return getInits().size(); };
+    static std::function<void(mlir::ImplicitLocOpBuilder &, mlir::Block &,
+                              mlir::ArrayRef<mlir::NamedAttribute>)>
+    getRegionBuilder() {
+      return nullptr;
+    }
+  }];
+
+  let hasCustomAssemblyFormat = 1;
+  let hasVerifier = 1;
+}
+
+
 //===----------------------------------------------------------------------===//
 // Named Linalg ops, implemented as a declarative configurations of generic ops.
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -124,7 +124,8 @@
 static ParseResult
 parseCommonStructuredOpParts(OpAsmParser &parser, OperationState &result,
                              SmallVectorImpl<Type> &inputTypes,
-                             SmallVectorImpl<Type> &outputTypes) {
+                             SmallVectorImpl<Type> &outputTypes,
+                             bool addOperandSegmentSizes = true) {
   SMLoc inputsOperandsLoc, outputsOperandsLoc;
   SmallVector<OpAsmParser::UnresolvedOperand, 4> inputsOperands,
       outputsOperands;
@@ -155,10 +156,12 @@
                              result.operands))
     return failure();
 
-  result.addAttribute("operand_segment_sizes",
-                      parser.getBuilder().getDenseI32ArrayAttr(
-                          {static_cast<int32_t>(inputsOperands.size()),
-                           static_cast<int32_t>(outputsOperands.size())}));
+  if (addOperandSegmentSizes) {
+    result.addAttribute("operand_segment_sizes",
+                        parser.getBuilder().getDenseI32ArrayAttr(
+                            {static_cast<int32_t>(inputsOperands.size()),
+                             static_cast<int32_t>(outputsOperands.size())}));
+  }
   return success();
 }
 
@@ -1180,6 +1183,234 @@
   return foldMemRefCast(*this);
 }
 
+//===----------------------------------------------------------------------===//
+// ReduceOp
+//===----------------------------------------------------------------------===//
+
+ArrayAttr ReduceOp::iterator_types() {
+  int64_t inputRank = getInputs()[0].getType().cast<ShapedType>().getRank();
+  SmallVector<StringRef> iteratorTypes(inputRank,
+                                       getParallelIteratorTypeName());
+  for (int64_t reductionDim : getDimensions())
+    iteratorTypes[reductionDim] = getReductionIteratorTypeName();
+  return Builder(getContext()).getStrArrayAttr(iteratorTypes);
+}
+
+ArrayAttr ReduceOp::getIndexingMaps() {
+  SmallVector<AffineMap> affineMaps;
+  int64_t inputRank = getInputs()[0].getType().cast<ShapedType>().getRank();
+  for (int64_t i = 0, e = getNumInputs(); i < e; ++i) {
+    affineMaps.push_back(
+        AffineMap::getMultiDimIdentityMap(inputRank, getContext()));
+  }
+  SmallVector<AffineExpr, 4> exprs;
+  ArrayRef<int64_t> dimensionsRef = getDimensions();
+  for (int64_t i = 0, j = 0; i < inputRank; ++i) {
+    bool isReductionDim = j < dimensionsRef.size() && dimensionsRef[j] == i;
+    if (isReductionDim) {
+      ++j;
+    } else {
+      exprs.push_back(getAffineDimExpr(i, getContext()));
+    }
+  }
+  for (int64_t i = 0, e = getNumOutputs(); i < e; ++i) {
+    affineMaps.push_back(
+        AffineMap::get(inputRank, /*symbolCount=*/0, exprs, getContext()));
+  }
+  return Builder(getContext()).getAffineMapArrayAttr(affineMaps);
+}
+
+void ReduceOp::getEffects(
+    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+        &effects) {
+  SmallVector<Value> inputBuffers = getInputBufferOperands();
+  SmallVector<Value> outputBuffers = getOutputBufferOperands();
+  getGenericEffectsImpl(effects, getOperation()->getResults(), inputBuffers,
+                        outputBuffers);
+}
+
+static ParseResult parseDstStyleOp(
+    OpAsmParser &parser, OperationState &result,
+    function_ref<ParseResult(OpAsmParser &, NamedAttrList &)> parseAttrsFn =
+        nullptr) {
+  // Parse `ins` and `outs`.
+  SmallVector<Type, 4> inputTypes, outputTypes;
+  if (parseCommonStructuredOpParts(parser, result, inputTypes, outputTypes,
+                                   /*addOperandSegmentSizes=*/false))
+    return failure();
+
+  // Add result types.
+  for (Type outputType : outputTypes) {
+    if (outputType.isa<RankedTensorType>())
+      result.addTypes(outputType);
+  }
+
+  // Parse required attributes.
+  if (parseAttrsFn && failed(parseAttrsFn(parser, result.attributes)))
+    return failure();
+
+  // Parse optional attributes.
+  if (parser.parseOptionalAttrDict(result.attributes))
+    return failure();
+  return success();
+}
+
+static ParseResult parseDenseI64ArrayAttr(OpAsmParser &parser,
+                                          NamedAttrList &attributes,
+                                          StringRef attributeName) {
+  if (parser.parseKeyword(attributeName) || parser.parseEqual())
+    return failure();
+
+  attributes.set(attributeName, DenseI64ArrayAttr::parse(parser, Type{}));
+  return success();
+}
+
+ParseResult ReduceOp::parse(OpAsmParser &parser, OperationState &result) {
+  if (parseDstStyleOp(
+          parser, result, [&](OpAsmParser &parser, NamedAttrList &attributes) {
+            return parseDenseI64ArrayAttr(parser, attributes, "dimensions");
+          }))
+    return failure();
+
+  SmallVector<OpAsmParser::Argument> regionArgs;
+  if (parser.parseArgumentList(regionArgs, OpAsmParser::Delimiter::Paren,
+                               /*allowType=*/true, /*allowAttrs=*/true)) {
+    return failure();
+  }
+
+  Region *body = result.addRegion();
+  if (parser.parseRegion(*body, regionArgs))
+    return failure();
+
+  return success();
+}
+
+static void printDenseI64ArrayAttr(OpAsmPrinter &p, StringRef attributeName,
+                                   ArrayRef<int64_t> attributeValue) {
+  p << " " << attributeName << " = [" << attributeValue << "] ";
+}
+
+void ReduceOp::print(OpAsmPrinter &p) {
+  printCommonStructuredOpParts(p, getInputs(), outputs());
+  printDenseI64ArrayAttr(p, getDimensionsAttrName(), getDimensions());
+  p.printOptionalAttrDict((*this)->getAttrs(), {getDimensionsAttrName()});
+
+  p << "(";
+  llvm::interleaveComma(getCombiner().getArguments(), p,
+                        [&](auto arg) { p.printRegionArgument(arg); });
+  p << ") ";
+
+  p.printRegion(getCombiner(), /*printEntryBlockArgs=*/false);
+}
+
+static bool dimensionsMatch(int64_t d1, int64_t d2) {
+  return ShapedType::isDynamic(d1) || ShapedType::isDynamic(d2) || d1 == d2;
+}
+
+LogicalResult ReduceOp::verify() {
+  ArrayRef<int64_t> dimensionsRef = getDimensions();
+
+  for (int64_t i = 1; i < getNumInputs(); ++i) {
+    if (failed(mlir::verifyCompatibleShape(getInputs()[i].getType(),
+                                           getInputs()[0].getType()))) {
+      return emitOpError() << "expects all inputs to have compatible shapes. "
+                              "Shape at input-index "
+                           << i
+                           << " is not compatible with shape at input-index 0.";
+    }
+  }
+  for (int64_t i = 1; i < getNumOutputs(); ++i) {
+    if (failed(mlir::verifyCompatibleShape(getInits()[i].getType(),
+                                           getInits()[0].getType()))) {
+      return emitOpError()
+             << "expects all outputs to have compatible shapes. "
+                "Shape at output-index "
+             << i << " is not compatible with shape at output-index 0.";
+    }
+  }
+  auto inputType = getInputs()[0].getType().cast<ShapedType>();
+  auto initType = getInits()[0].getType().cast<ShapedType>();
+
+  DenseSet<int64_t> dimensionsToReduce;
+  int64_t lastDimension = -1;
+  for (int64_t dimension : dimensionsRef) {
+    if (dimension < 0 || dimension >= inputType.getRank()) {
+      return emitOpError()
+             << "dimensions for reduction should be in the range [0, "
+             << inputType.getRank() - 1 << "].";
+    }
+    if (dimension <= lastDimension) {
+      return emitOpError()
+             << "reduction dimensions are not in increasing order: "
+             << dimensionsRef;
+    }
+
+    lastDimension = dimension;
+    dimensionsToReduce.insert(dimension);
+  }
+
+  auto inputDims = inputType.getShape();
+  auto initDims = initType.getShape();
+
+  // Input dimensions that will be left after the reduction.
+  SmallVector<int64_t> reducedInputDims;
+  for (const auto &en : llvm::enumerate(inputDims)) {
+    if (!dimensionsToReduce.count(en.index()))
+      reducedInputDims.push_back(en.value());
+  }
+
+  if (reducedInputDims.size() != initType.getRank()) {
+    return emitOpError() << "number of dimensions after reduction "
+                         << reducedInputDims.size()
+                         << " doesn't match the init rank "
+                         << initType.getRank();
+  }
+
+  if (!all_of_zip(reducedInputDims, initDims, &dimensionsMatch))
+    return emitOpError() << "init dimensions [" << initDims
+                         << "] doesn't match input dimensions after reduction ["
+                         << reducedInputDims << "]";
+
+  Block *block = getBody();
+  if (static_cast<int64_t>(block->getArguments().size()) !=
+      getNumInputs() + getNumOutputs()) {
+    return emitOpError()
+           << "number of block arguments " << block->getArguments().size()
+           << " doesn't match the number of inputs plus the number of outputs "
+           << getNumInputs() + getNumOutputs();
+  }
+
+  // Check that the first block arguments match the element type of the inputs.
+  auto inputElementTypes =
+      llvm::to_vector<8>(llvm::map_range(getInputs().getTypes(), [](Type type) {
+        return type.cast<ShapedType>().getElementType();
+      }));
+  auto blockArgumentInputTypes = llvm::to_vector<8>(
+      llvm::map_range(block->getArguments().take_front(getNumInputs()),
+                      [](BlockArgument arg) { return arg.getType(); }));
+  if (blockArgumentInputTypes != inputElementTypes) {
+    return emitOpError() << "input element types " << inputElementTypes
+                         << " do not match block argument types "
+                         << blockArgumentInputTypes;
+  }
+
+  // Check that the last block arguments match the element type of the outputs.
+  auto outputElementTypes =
+      llvm::to_vector<8>(llvm::map_range(getInits().getTypes(), [](Type type) {
+        return type.cast<ShapedType>().getElementType();
+      }));
+  auto blockArgumentOutputTypes = llvm::to_vector<8>(
+      llvm::map_range(block->getArguments().take_back(getNumOutputs()),
+                      [](BlockArgument arg) { return arg.getType(); }));
+  if (blockArgumentOutputTypes != outputElementTypes) {
+    return emitOpError() << "output element types " << outputElementTypes
+                         << " do not match block argument types "
+                         << blockArgumentOutputTypes;
+  }
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // InitTensorOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -415,3 +415,175 @@
         }
         return
 }
+
+// -----
+
+func.func @reduce_input_vs_init_dimension_mismatch(
+    %input: tensor<16x32x64xf32>,
+    %init: tensor<16x64xf32>)  -> tensor<16x64xf32> {
+  // expected-error @+1 {{'linalg.reduce' op init dimensions [16, 64] doesn't match input dimensions after reduction [16, 32]}}
+  %reduce = linalg.reduce
+      ins(%input:tensor<16x32x64xf32>)
+      outs(%init:tensor<16x64xf32>)
+      dimensions = [2]
+      (%in: f32, %out: f32) {
+        %0 = arith.addf %in, %out: f32
+        linalg.yield %0: f32
+      }
+  func.return %reduce : tensor<16x64xf32>
+}
+
+// -----
+
+func.func @reduce_dimensions_out_of_range(%input: tensor<16x32x64xf32>,
+    %init: tensor<16x64xf32>)  -> tensor<16x64xf32> {
+  // expected-error @+1 {{'linalg.reduce' op dimensions for reduction should be in the range [0, 2].}}
+  %reduce = linalg.reduce
+      ins(%input:tensor<16x32x64xf32>)
+      outs(%init:tensor<16x64xf32>)
+      dimensions = [3]
+      (%in: f32, %out: f32) {
+        %0 = arith.addf %in, %out: f32
+        linalg.yield %0: f32
+      }
+  func.return %reduce : tensor<16x64xf32>
+}
+
+// -----
+
+func.func @reduce_duplicate_dimensions(%input: tensor<16x32x64xf32>,
+    %init: tensor<16xf32>)  -> tensor<16xf32> {
+  // expected-error @+1 {{'linalg.reduce' op reduction dimensions are not in increasing order: 1, 1}}
+  %reduce = linalg.reduce
+      ins(%input:tensor<16x32x64xf32>)
+      outs(%init:tensor<16xf32>)
+      dimensions = [1, 1]
+      (%in: f32, %out: f32) {
+        %0 = arith.addf %in, %out: f32
+        linalg.yield %0: f32
+      }
+  func.return %reduce : tensor<16xf32>
+}
+
+// -----
+
+func.func @reduce_non_increasing_dimensions(%input: tensor<16x32x64xf32>,
+    %init: tensor<16xf32>)  -> tensor<16xf32> {
+  // expected-error @+1 {{'linalg.reduce' op reduction dimensions are not in increasing order: 2, 1}}
+  %reduce = linalg.reduce
+      ins(%input:tensor<16x32x64xf32>)
+      outs(%init:tensor<16xf32>)
+      dimensions = [2, 1]
+      (%in: f32, %out: f32) {
+        %0 = arith.addf %in, %out: f32
+        linalg.yield %0: f32
+      }
+  func.return %reduce : tensor<16xf32>
+}
+
+// -----
+
+func.func @reduce_reduced_input_init_rank_mismatch(%input: tensor<16x32x64xf32>,
+    %init: tensor<16x64xf32>)  -> tensor<16x64xf32> {
+  // expected-error @+1 {{'linalg.reduce' op number of dimensions after reduction 1 doesn't match the init rank 2}}
+  %reduce = linalg.reduce
+      ins(%input:tensor<16x32x64xf32>)
+      outs(%init:tensor<16x64xf32>)
+      dimensions = [1, 2]
+      (%in: f32, %out: f32) {
+        %0 = arith.addf %in, %out: f32
+        linalg.yield %0: f32
+      }
+  func.return %reduce : tensor<16x64xf32>
+}
+
+// -----
+
+func.func @reduce_wrong_number_of_block_arguments(
+    %input1: tensor<16x32x64xf32>,
+    %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xf32>,
+    %init2: tensor<16x64xf32>)  -> (tensor<16x64xf32>, tensor<16x64xf32>) {
+  // expected-error @+1{{'linalg.reduce' op number of block arguments 2 doesn't match the number of inputs plus the number of outputs 4}}
+  %reduce, %reduce2 = linalg.reduce
+      ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
+      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>)
+      dimensions = [1]
+      (%in: f32, %out: f32) {
+        %0 = arith.addf %in, %out: f32
+        linalg.yield %0: f32
+      }
+  func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<16x64xf32>
+}
+
+// -----
+
+func.func @reduce_wrong_block_argument_input_type(
+    %input1: tensor<16x32x64xf32>,
+    %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xf32>,
+    %init2: tensor<16x64xf32>)  -> (tensor<16x64xf32>, tensor<16x64xf32>) {
+  // expected-error @+1{{'linalg.reduce' op input element types 'f32', 'f32' do not match block argument types 'f32', 'f64'}}
+  %reduce, %reduce2 = linalg.reduce
+      ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
+      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>)
+      dimensions = [1]
+      (%in1: f32, %in2: f64, %out1: f32, %out2: f64) {
+        %0 = arith.addf %in1, %out1: f32
+        %1 = arith.addf %in2, %out2: f64
+        linalg.yield %0, %1: f32, f64
+      }
+  func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<16x64xf32>
+}
+
+// -----
+
+func.func @reduce_wrong_block_argument_output_type(
+    %input1: tensor<16x32x64xf32>,
+    %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xf32>,
+    %init2: tensor<16x64xf32>)  -> (tensor<16x64xf32>, tensor<16x64xf32>) {
+  // expected-error @+1{{'linalg.reduce' op output element types 'f32', 'f32' do not match block argument types 'f32', 'f64'}}
+  %reduce, %reduce2 = linalg.reduce
+      ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
+      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>)
+      dimensions = [1]
+      (%in1: f32, %in2: f32, %out1: f32, %out2: f64) {
+        %0 = arith.addf %in1, %out1: f32
+        linalg.yield %0, %out2: f32, f64
+      }
+  func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<16x64xf32>
+}
+
+// -----
+
+func.func @reduce_incompatible_input_shapes(%input1: tensor<16x32x64xf32>,
+    %init1: tensor<16x64xf32>, %input2: tensor<17x32x64xf32>,
+    %init2: tensor<17x64xf32>)  -> (tensor<16x64xf32>, tensor<17x64xf32>) {
+  // expected-error @+1{{'linalg.reduce' op expects all inputs to have compatible shapes. Shape at input-index 1 is not compatible with shape at input-index 0.}}
+  %reduce, %reduce2 = linalg.reduce
+      ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<17x32x64xf32>)
+      outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>)
+      dimensions = [1]
+      (%in1: f32, %in2: f32, %out1: f32, %out2: f32) {
+        %0 = arith.addf %in1, %out1: f32
+        %1 = arith.addf %in2, %out2: f32
+        linalg.yield %0, %1: f32, f32
+      }
+  func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<17x64xf32>
+}
+
+// -----
+
+func.func @reduce_incompatible_output_shapes(%input1: tensor<16x32x64xf32>,
+    %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xf32>,
+    %init2: tensor<17x64xf32>)  -> (tensor<16x64xf32>, tensor<17x64xf32>) {
+  // expected-error @+1{{'linalg.reduce' op expects all outputs to have compatible shapes. Shape at output-index 1 is not compatible with shape at output-index 0.}}
+  %reduce, %reduce2 = linalg.reduce
+      ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
+      outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>)
+      dimensions = [1]
+      (%in1: f32, %in2: f32, %out1: f32, %out2: f32) {
+        %0 = arith.addf %in1, %out1: f32
+        %1 = arith.addf %in2, %out2: f32
+        linalg.yield %0, %1: f32, f32
+      }
+  func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<17x64xf32>
+}
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -366,3 +366,39 @@
 }
 // CHECK-LABEL: func @mixed_parallel_reduced_results
 //       CHECK:     linalg.generic
+
+// -----
+
+func.func @reduce(%input: tensor<16x32x64xf32>,
+                     %init: tensor<16x64xf32>) -> tensor<16x64xf32> {
+  %reduce = linalg.reduce
+      ins(%input:tensor<16x32x64xf32>)
+      outs(%init:tensor<16x64xf32>)
+      dimensions = [1]
+      (%in: f32, %out: f32) {
+        %0 = arith.addf %in, %out: f32
+        linalg.yield %0: f32
+      }
+  func.return %reduce : tensor<16x64xf32>
+}
+// CHECK-LABEL: func @reduce
+//       CHECK:     linalg.reduce
+
+// -----
+
+func.func @variadic_reduce(%input1: tensor<16x32x64xf32>,
+    %init1: tensor<16x64xf32>, %input2: tensor<16x32x64xi64>,
+    %init2: tensor<16x64xi64>)  -> (tensor<16x64xf32>, tensor<16x64xi64>) {
+  %reduce, %reduce2 = linalg.reduce
+      ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xi64>)
+      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xi64>)
+      dimensions = [1]
+      (%in1: f32, %in2: i64, %out1: f32, %out2: i64) {
+        %0 = arith.addf %in1, %out1: f32
+        %1 = arith.addi %in2, %out2: i64
+        linalg.yield %0, %1: f32, i64
+      }
+  func.return %reduce, %reduce2 : tensor<16x64xf32>, tensor<16x64xi64>
+}
+// CHECK-LABEL: func @variadic_reduce
+//       CHECK:     linalg.reduce