diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h
--- a/mlir/include/mlir/IR/AffineMap.h
+++ b/mlir/include/mlir/IR/AffineMap.h
@@ -327,6 +327,21 @@
 /// ```
 AffineMap concatAffineMaps(ArrayRef<AffineMap> maps);
 
+/// Returns the map that results from projecting out the dimensions specified in
+/// `projectedDimensions`. The projected dimensions are set to 0.
+///
+/// Example:
+/// 1) map                  : affine_map<(d0, d1, d2) -> (d0, d1)>
+///    projected_dimensions : {2}
+///    result               : affine_map<(d0, d1) -> (d0, d1)>
+///
+/// 2) map                  : affine_map<(d0, d1) -> (d0 + d1)>
+///    projected_dimensions : {1}
+///    result               : affine_map<(d0) -> (d0)>
+///
+/// 3) map                  : affine_map<(d0, d1, d2) -> (d0, d1)>
+///    projected_dimensions : {1}
+///    result               : affine_map<(d0, d1) -> (d0, 0)>
 AffineMap getProjectedMap(AffineMap map,
                           ArrayRef<unsigned> projectedDimensions);
 
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -1938,6 +1938,11 @@
       newOperands.push_back(fold ? tensorCastOp.getOperand() : v);
       newResultTypes.push_back(newOperands.back().getType());
     }
+    if (linalgOp.getNumInitTensors() == 0) {
+      for (Value v : linalgOp.getOperation()->getResults()) {
+        newResultTypes.push_back(v.getType());
+      }
+    }
     auto extraOperands = linalgOp.getAssumedNonShapedOperands();
     newOperands.append(extraOperands.begin(), extraOperands.end());
     // Clone op.
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -19,6 +19,7 @@
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/SCF/EDSC/Builders.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/AffineMap.h"
@@ -220,9 +221,8 @@
 
 static SmallVector<Value, 4>
 makeTiledShapes(OpBuilder &b, Location loc, LinalgOp linalgOp,
-                ValueRange operands, AffineMap map, ValueRange ivs,
+                ArrayRef<Value> tiledOperands, AffineMap map, ValueRange ivs,
                 ValueRange tileSizes, ValueRange allShapeSizes) {
-  assert(operands.size() == linalgOp.getShapedOperands().size());
   assert(ivs.size() == static_cast<size_t>(llvm::count_if(
                            llvm::make_range(tileSizes.begin(), tileSizes.end()),
                            [](Value v) { return !isZero(v); })) &&
@@ -242,11 +242,9 @@
     subShapeSizes.push_back(size - std_constant_index(1));
   }
 
-  auto *op = linalgOp.getOperation();
-
   SmallVector<Value, 4> res;
-  res.reserve(op->getNumOperands());
-  for (auto en : llvm::enumerate(operands)) {
+  res.reserve(tiledOperands.size());
+  for (auto en : llvm::enumerate(tiledOperands)) {
     Value shapedOp = en.value();
     ShapedType shapedType = shapedOp.getType().cast<ShapedType>();
     unsigned rank = shapedType.getRank();
@@ -313,6 +311,40 @@
   return res;
 }
 
+/// Linalg operation on tensors might not have a `init` tensor if they dont have
+/// reduction semantics. Tiling though requires an init tensor since it is the
+/// tensor into which the sub tensor created by the body of the tiled loop is
+/// inserted into. This struct provides a uniform interface to reason about init
+/// tensors during tiling.
+struct TiledOp {
+  TiledOp(OpBuilder &b, LinalgOp op) : op(op) {
+    if (op.getOperation()->getNumResults() != 0 &&
+        op.getNumInitTensors() == 0) {
+      for (Value result : op.getOperation()->getResults()) {
+        ShapedType resultType = result.getType().cast<ShapedType>();
+        SmallVector<Value, 4> shape = llvm::to_vector<4>(
+            llvm::map_range(llvm::seq<int64_t>(0, resultType.getRank()),
+                            [&](int64_t v) -> Value {
+                              return b.create<DimOp>(op.getLoc(), result, v);
+                            }));
+        initTensors.push_back(b.create<InitTensorOp>(
+            op.getLoc(), shape, resultType.getElementType()));
+      }
+    } else {
+      initTensors = llvm::to_vector<1>(op.getInitTensors());
+    }
+  }
+  SmallVector<Value, 4> getTiledOperands() {
+    SmallVector<Value, 4> tiledOperands =
+        llvm::to_vector<4>(op.getShapedOperands());
+    if (op.getNumInitTensors() == 0)
+      tiledOperands.append(initTensors.begin(), initTensors.end());
+    return tiledOperands;
+  }
+  LinalgOp op;
+  SmallVector<Value, 1> initTensors;
+};
+
 template <typename LoopTy>
 static Optional<TiledLinalgOp>
 tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes,
@@ -341,6 +373,7 @@
   LoopIndexToRangeIndexMap loopIndexToRangeIndex;
   std::tie(loopRanges, loopIndexToRangeIndex) = makeTiledLoopRanges(
       b, op.getLoc(), shapeSizesToLoopsMap, allShapeSizes, tileSizes);
+
   SmallVector<Attribute, 4> iteratorTypes;
   for (auto attr :
        enumerate(op.iterator_types().cast<ArrayAttr>().getValue())) {
@@ -374,9 +407,9 @@
   // 2. Create the tiled loops.
   LinalgOp res = op;
   SmallVector<Value, 4> ivs, tensorResults;
-  auto initTensors = op.getInitTensors();
+  TiledOp tiledOp(b, op);
   GenerateLoopNest<LoopTy>::doit(
-      loopRanges, /*iterArgInitValues*/ initTensors, iteratorTypes,
+      loopRanges, /*iterArgInitValues*/ tiledOp.initTensors, iteratorTypes,
       [&](ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector {
         auto &b = ScopedContext::getBuilderRef();
         auto loc = ScopedContext::getLocation();
@@ -391,47 +424,42 @@
         else
           interchangedIvs.assign(ivs.begin(), ivs.end());
 
-        assert(op.getNumInitTensors() == iterArgs.size() &&
+        assert((tiledOp.initTensors.size() == iterArgs.size()) &&
                "num init tensors must match number of loop iter arguments");
-        // This uses knowledge about position of the init tensor in the list
-        // of operands.
-        auto operands = llvm::to_vector<4>(op.getShapedOperands());
+        // Replace the init tensors of the original operands with the arguments
+        // of the loop to get the init tensors of the tiled operation.
+        // This uses knowledge about position of the init tensor in the list of
+        // operands.
+        SmallVector<Value, 4> tiledOperands = tiledOp.getTiledOperands();
         std::copy(iterArgs.begin(), iterArgs.end(),
-                  operands.begin() + op.getNumInputsAndOutputBuffers());
+                  tiledOperands.begin() + op.getNumInputsAndOutputBuffers());
 
-        SmallVector<Value, 4> tiledOperands =
-            makeTiledShapes(b, loc, op, operands, shapeSizesToLoopsMap,
+        SmallVector<Value, 4> tiledValues =
+            makeTiledShapes(b, loc, op, tiledOperands, shapeSizesToLoopsMap,
                             interchangedIvs, tileSizes, allShapeSizes);
-        auto nonShapedOperands = op.getAssumedNonShapedOperands();
-        tiledOperands.append(nonShapedOperands.begin(),
-                             nonShapedOperands.end());
-
-        // If LinalgOp has results, they must all be tied to init tensors.
-        // We enforce this to ensure all tiled ops have been rewritten in
-        // "init tensor" form. This ensures tiling has anchor values into which
-        // to subtensor / subtensor_insert. Otherwise tiling would need to
-        // allocate which is not acceptable.
-        // This would not be the case with a special terminator op that
-        // generates the whole tensor (instead of inserting a subtensor). But
-        // the generator-based abstraction has other issues.
-        assert(op.getNumInitTensors() == op->getNumResults() &&
-               "expected same number of init tensors as number of results");
 
         // Handle init tensor operands.
         // This uses knowledge about position of the init tensor in the list
         // of operands.
         // TODO: InterfaceAdaptor ?
         SmallVector<Type, 4> resultTensorTypes;
-        for (auto idx : llvm::seq<unsigned>(0, op.getNumInitTensors()))
+        for (auto idx :
+             llvm::seq<unsigned>(0, op.getOperation()->getNumResults()))
           resultTensorTypes.push_back(
-              tiledOperands[op.getNumInputsAndOutputBuffers() + idx].getType());
+              tiledValues[op.getNumInputsAndOutputBuffers() + idx].getType());
 
-        res = op.clone(b, loc, resultTensorTypes, tiledOperands);
+        SmallVector<Value, 4> clonedOpOperands = llvm::to_vector<4>(
+            ArrayRef<Value>(tiledValues).take_front(op.getNumShapedOperands()));
+        auto nonShapedOperands = op.getAssumedNonShapedOperands();
+        clonedOpOperands.append(nonShapedOperands.begin(),
+                                nonShapedOperands.end());
+        res = op.clone(b, loc, resultTensorTypes, clonedOpOperands);
 
         // Insert a subtensor_insert for each init subtensor.
-        for (unsigned idx = 0, e = op.getNumInitTensors(); idx != e; ++idx) {
+        for (unsigned idx = 0, e = tiledOp.initTensors.size(); idx != e;
+             ++idx) {
           Value initTensor =
-              tiledOperands[op.getNumInputsAndOutputBuffers() + idx];
+              tiledValues[op.getNumInputsAndOutputBuffers() + idx];
           if (auto subtensor = initTensor.getDefiningOp<SubTensorOp>()) {
             tensorResults.push_back(b.create<SubTensorInsertOp>(
                 loc, subtensor.source().getType(), res->getResult(idx),
@@ -581,10 +609,10 @@
 static void insertTilingPatterns(OwningRewritePatternList &patterns,
                                  const LinalgTilingOptions &options,
                                  MLIRContext *ctx) {
-  RewritePatternList<
+  RewritePatternList<GenericOp, IndexedGenericOp,
 #define GET_OP_LIST
 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
-      >::insert(patterns, options, ctx);
+                     >::insert(patterns, options, ctx);
 }
 
 static void applyTilingToLoopPatterns(LinalgTilingLoopType loopType,
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -125,15 +125,8 @@
   if (failed(marker.checkAndNotify(rewriter, linalgOp)))
     return failure();
 
-  // If LinalgOp has results, they must all be tied to init tensors.
-  // We enforce this to ensure all tiled ops have been rewritten in
-  // "init tensor" form. This ensures tiling has anchor values into which to
-  // subtensor / subtensor_insert. Otherwise tiling would need to allocate which
-  // is not acceptable.
-  // This would not be the case with a special terminator op that generates the
-  // whole tensor (instead of inserting a subtensor). But the generator-based
-  // abstraction has other issues.
-  if (linalgOp.getNumInitTensors() != linalgOp->getNumResults())
+  if (linalgOp.getNumInitTensors() != 0 &&
+      linalgOp.getNumInitTensors() != linalgOp->getNumResults())
     return failure();
 
   Optional<TiledLinalgOp> res = tileLinalgOp(rewriter, linalgOp, options);
diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" -mlir-disable-threading=true | FileCheck %s
+// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @matmul_tensors(
 // CHECK-SAME:    %[[TA:[0-9a-z]+]]: tensor<?x?xf32>
@@ -26,3 +26,40 @@
 //      CHECK: return %[[TD0]] : tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
+
+// -----
+
+func @generic_op_tensors(
+  %arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
+  %0 = linalg.generic
+    {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+                      affine_map<(d0, d1, d2) -> (d0, d2, d1)>,
+		      affine_map<(d0, d1, d2) -> (d2, d1, d0)>],
+     iterator_types = ["parallel", "parallel", "parallel"]}
+    ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
+    ^bb0(%arg2 : f32, %arg3: f32):
+      %1 = addf %arg2, %arg3 : f32
+      linalg.yield %1 : f32
+    } -> tensor<?x?x?xf32>
+  return %0 : tensor<?x?x?xf32>
+}
+
+// CHECK-LABEL: func @generic_op_tensors
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//       CHECK:   %[[INIT:.+]] = linalg.init_tensor
+//       CHECK:   %[[TD0:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC0:.+]] = %[[INIT]]) -> (tensor<?x?x?xf32>) {
+//       CHECK:     %[[TD1:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC1:.+]] = %[[TC0]]) -> (tensor<?x?x?xf32>) {
+//       CHECK:       %[[TD2:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC2:.+]] = %[[TC1]]) -> (tensor<?x?x?xf32>) {
+//       CHECK:       %[[STARG0:.+]] = subtensor %[[ARG0]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+//       CHECK:       %[[STARG1:.+]] = subtensor %[[ARG1]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+//       CHECK:       %[[STRETURN:.+]] = linalg.generic
+//  CHECK-SAME:         ins(%[[STARG0]], %[[STARG1]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
+//       CHECK:       %[[TD:.+]] = subtensor_insert %[[STRETURN]] into %[[TC2]]
+//       CHECK:       scf.yield %[[TD]]
+//       CHECK:     }
+//       CHECK:     scf.yield %[[TD2]]
+//       CHECK:   }
+//       CHECK:   scf.yield %[[TD1]]
+//       CHECK: }
+//       CHECK: return %[[TD0]]