diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/AffineAnalysis.h b/mlir/include/mlir/Dialect/Affine/Analysis/AffineAnalysis.h
--- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineAnalysis.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineAnalysis.h
@@ -55,7 +55,8 @@
 
 /// Returns true if `forOp' doesn't have memory dependences preventing
 /// parallelization. This function doesn't check iter_args and should be used
-/// only as a building block for full parallel-checking functions.
+/// only as a building block for full parallel-checking functions. Memrefs that
+/// are allocated inside `forOp` do not impact its dependences and parallelism.
 bool isLoopMemoryParallel(AffineForOp forOp);
 
 /// Returns in `affineApplyOps`, the sequence of those AffineApplyOp
diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
--- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
@@ -17,13 +17,12 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
-#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/Support/MathExtras.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -115,6 +114,40 @@
   return isLoopMemoryParallel(forOp);
 }
 
+/// Returns true if `op` is an alloc-like op, i.e., one allocating memrefs.
+static bool isAllocLikeOp(Operation *op) {
+  auto memEffects = dyn_cast<MemoryEffectOpInterface>(op);
+  return memEffects && memEffects.hasEffect<MemoryEffects::Allocate>();
+}
+
+/// Returns true if `v` is allocated locally to `enclosingOp` -- i.e., it is
+/// allocated by an operation nested within `enclosingOp`.
+static bool isLocallyDefined(Value v, Operation *enclosingOp) {
+  Operation *defOp = v.getDefiningOp();
+  if (!defOp)
+    return false;
+
+  if (isAllocLikeOp(defOp) && enclosingOp->isProperAncestor(defOp))
+    return true;
+
+  // Aliasing ops.
+  // TODO: cover all known ops via an appropriate op interface. Common ones
+  // listed here for now.
+  if (auto castOp = dyn_cast<memref::CollapseShapeOp>(defOp))
+    return isLocallyDefined(castOp.src(), enclosingOp);
+  if (auto castOp = dyn_cast<memref::ExpandShapeOp>(defOp))
+    return isLocallyDefined(castOp.src(), enclosingOp);
+  if (auto castOp = dyn_cast<memref::ReinterpretCastOp>(defOp))
+    return isLocallyDefined(castOp.source(), enclosingOp);
+  if (auto castOp = dyn_cast<memref::ReshapeOp>(defOp))
+    return isLocallyDefined(castOp.source(), enclosingOp);
+  if (auto castOp = dyn_cast<memref::SubViewOp>(defOp))
+    return isLocallyDefined(castOp.source(), enclosingOp);
+  if (auto castOp = dyn_cast<memref::ViewOp>(defOp))
+    return isLocallyDefined(castOp.source(), enclosingOp);
+  return false;
+}
+
 /// Returns true if `forOp' doesn't have memory dependences preventing
 /// parallelization. This function doesn't check iter_args and should be used
 /// only as a building block for full parallel-checking functions.
@@ -122,11 +155,25 @@
   // Collect all load and store ops in loop nest rooted at 'forOp'.
   SmallVector<Operation *, 8> loadAndStoreOps;
   auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
-    if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
-      loadAndStoreOps.push_back(op);
-    else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
-             !MemoryEffectOpInterface::hasNoEffect(op))
+    if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) {
+      // Memrefs that are allocated inside `forOp` need not be considered. Note
+      // that such local memrefs may still escape or could be passed around as
+      // iter args but checking for that is, strictly speaking, the concern of
+      // `isLoopParallel`: it's not a memory dependence.
+      if (!isLocallyDefined(readOp.getMemRef(), forOp))
+        loadAndStoreOps.push_back(op);
+    } else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) {
+      // Filter out stores the same way as above.
+      if (!isLocallyDefined(writeOp.getMemRef(), forOp))
+        loadAndStoreOps.push_back(op);
+    } else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
+               !isAllocLikeOp(op) &&
+               !MemoryEffectOpInterface::hasNoEffect(op)) {
+      // Alloc-like ops inside `forOp` are fine (they don't impact parallelism)
+      // as long as they don't escape the loop. When they escape or cross loop
+      // iterations, it's still not a memory dependence.
       return WalkResult::interrupt();
+    }
 
     return WalkResult::advance();
   });
diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
--- a/mlir/test/Dialect/Affine/parallelize.mlir
+++ b/mlir/test/Dialect/Affine/parallelize.mlir
@@ -269,3 +269,46 @@
   }
   return
 }
+
+// Test in the presence of locally allocated memrefs.
+
+// CHECK: func @local_alloc
+func @local_alloc() {
+  %cst = arith.constant 0.0 : f32
+  affine.for %i = 0 to 100 {
+    %m = memref.alloc() : memref<1xf32>
+    %ma = memref.alloca() : memref<1xf32>
+    affine.store %cst, %m[0] : memref<1xf32>
+  }
+  // CHECK: affine.parallel
+  return
+}
+
+// CHECK: func @local_alloc_cast
+func @local_alloc_cast() {
+  %cst = arith.constant 0.0 : f32
+  affine.for %i = 0 to 100 {
+    %m = memref.alloc() : memref<128xf32>
+    affine.for %j = 0 to 128 {
+      affine.store %cst, %m[%j] : memref<128xf32>
+    }
+    affine.for %j = 0 to 128 {
+      affine.store %cst, %m[0] : memref<128xf32>
+    }
+    %r = memref.reinterpret_cast %m to offset: [0], sizes: [8, 16],
+           strides: [16, 1] : memref<128xf32> to memref<8x16xf32>
+    affine.for %j = 0 to 8 {
+      affine.store %cst, %r[%j, %j] : memref<8x16xf32>
+    }
+  }
+  // CHECK: affine.parallel
+  // CHECK:   affine.parallel
+  // CHECK:   }
+  // CHECK:   affine.for
+  // CHECK:   }
+  // CHECK:   affine.parallel
+  // CHECK:   }
+  // CHECK: }
+
+  return
+}