diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -42,9 +42,11 @@
 
 /// Creates a pass that promotes heap-based allocations to stack-based ones.
 /// Only buffers smaller than the provided size are promoted.
+/// Dynamic shaped buffers are promoted up to the given rank.
 std::unique_ptr<Pass>
 createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes = 1024,
-                                unsigned bitwidthOfIndexType = 64);
+                                unsigned bitwidthOfIndexType = 64,
+                                unsigned maxRankOfTensor = 2);
 
 /// Creates a pass that converts memref function results to out-params.
 std::unique_ptr<Pass> createBufferResultsToOutParamsPass();
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -213,11 +213,13 @@
   let options = [
     Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
            /*default=*/"1024",
-           "Define the maximum size in bytes to promote allocations to stack.">,
+           "Maximal size in bytes to promote allocations to stack.">,
     Option<"bitwidthOfIndexType", "bitwidth-of-index-type", "unsigned",
            /*default=*/"64",
-           "Define the bitwidth of the index type. Used for size estimation.">,
-
+           "Bitwidth of the index type. Used for size estimation.">,
+    Option<"maxRankOfTensors", "max-rank-of-tensors", "unsigned",
+           /*default=*/"2",
+           "Maximal tensor rank to promote dynamic buffers.">,
   ];
 }
 
diff --git a/mlir/lib/Transforms/BufferOptimizations.cpp b/mlir/lib/Transforms/BufferOptimizations.cpp
--- a/mlir/lib/Transforms/BufferOptimizations.cpp
+++ b/mlir/lib/Transforms/BufferOptimizations.cpp
@@ -30,10 +30,23 @@
 /// transformation is only applied to small buffers since large buffers could
 /// exceed the stack space.
 static bool isSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
-                         unsigned bitwidthOfIndexType) {
+                         unsigned bitwidthOfIndexType,
+                         unsigned maxRankOfTensors) {
   auto type = alloc.getType().dyn_cast<ShapedType>();
-  if (!type || !type.hasStaticShape())
+  if (!type)
     return false;
+  if (!type.hasStaticShape()) {
+    // Check if the dynamic shape dimension is below the allowed rank.
+    if (type.getRank() < maxRankOfTensors) {
+      // Check if all operands of an alloc are RankOp.
+      return llvm::all_of(alloc.getDefiningOp()->getOperands(),
+                          [&](Value operand) {
+                            Operation *operandOp = operand.getDefiningOp();
+                            return operandOp && llvm::isa<RankOp>(operandOp);
+                          });
+    }
+    return false;
+  }
   // For index types, use the provided size, as the type does not know.
   unsigned int bitwidth = type.getElementType().isIndex()
                               ? bitwidthOfIndexType
@@ -286,7 +299,8 @@
       : BufferPlacementTransformationBase(op) {}
 
   /// Promote buffers to stack-based allocations.
-  void promote(unsigned maximumSize, unsigned bitwidthOfIndexType) {
+  void promote(unsigned maximumSize, unsigned bitwidthOfIndexType,
+               unsigned maxRankOfTensors) {
     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
       Value alloc = std::get<0>(entry);
       Operation *dealloc = std::get<1>(entry);
@@ -294,8 +308,9 @@
       // The transformation is done if the allocation is limited to a given
       // size. Furthermore, a deallocation must not be defined for this
       // allocation entry and a parent allocation scope must exist.
-      if (!isSmallAlloc(alloc, maximumSize, bitwidthOfIndexType) || dealloc ||
-          !hasAllocationScope(alloc, aliases))
+      if (!isSmallAlloc(alloc, maximumSize, bitwidthOfIndexType,
+                        maxRankOfTensors) ||
+          dealloc || !hasAllocationScope(alloc, aliases))
         continue;
 
       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
@@ -303,12 +318,13 @@
       // Build a new alloca that is associated with its parent
       // `AutomaticAllocationScope` determined during the initialization phase.
       OpBuilder builder(startOperation);
-      auto alloca = builder.create<AllocaOp>(
-          alloc.getLoc(), alloc.getType().cast<MemRefType>());
+      Operation *allocOp = alloc.getDefiningOp();
+      Operation *alloca = builder.create<AllocaOp>(
+          alloc.getLoc(), alloc.getType().cast<MemRefType>(),
+          allocOp->getOperands());
 
       // Replace the original alloc by a newly created alloca.
-      Operation *allocOp = alloc.getDefiningOp();
-      allocOp->replaceAllUsesWith(alloca.getOperation());
+      allocOp->replaceAllUsesWith(alloca);
       allocOp->erase();
     }
   }
@@ -347,15 +363,18 @@
     : PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
 
   PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
-                            unsigned bitwidthOfIndexType) {
+                            unsigned bitwidthOfIndexType,
+                            unsigned maxRankOfTensors) {
     this->maxAllocSizeInBytes = maxAllocSizeInBytes;
     this->bitwidthOfIndexType = bitwidthOfIndexType;
+    this->maxRankOfTensors = maxRankOfTensors;
   }
 
   void runOnFunction() override {
     // Move all allocation nodes and convert candidates into allocas.
     BufferPlacementPromotion optimizer(getFunction());
-    optimizer.promote(this->maxAllocSizeInBytes, this->bitwidthOfIndexType);
+    optimizer.promote(this->maxAllocSizeInBytes, this->bitwidthOfIndexType,
+                      this->maxRankOfTensors);
   }
 };
 
@@ -371,7 +390,8 @@
 
 std::unique_ptr<Pass>
 mlir::createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
-                                      unsigned bitwidthOfIndexType) {
-  return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
-                                                     bitwidthOfIndexType);
+                                      unsigned bitwidthOfIndexType,
+                                      unsigned maxRankOfTensors) {
+  return std::make_unique<PromoteBuffersToStackPass>(
+      maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfTensors);
 }
diff --git a/mlir/test/Transforms/promote-buffers-to-stack.mlir b/mlir/test/Transforms/promote-buffers-to-stack.mlir
--- a/mlir/test/Transforms/promote-buffers-to-stack.mlir
+++ b/mlir/test/Transforms/promote-buffers-to-stack.mlir
@@ -1,6 +1,7 @@
 // RUN: mlir-opt -promote-buffers-to-stack -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix DEFINDEX
 // RUN: mlir-opt -promote-buffers-to-stack="bitwidth-of-index-type=256 max-alloc-size-in-bytes=128" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix BIGINDEX
 // RUN: mlir-opt -promote-buffers-to-stack="bitwidth-of-index-type=256 max-alloc-size-in-bytes=64" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix LOWLIMIT
+// RUN: mlir-opt -promote-buffers-to-stack="max-rank-of-tensors=3" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix RANK
 
 // This file checks the behavior of PromoteBuffersToStack pass for converting
 // AllocOps into AllocaOps, if possible.
@@ -14,8 +15,6 @@
 // PromoteBuffersToStack expected behavior: It should convert %0 into an
 // AllocaOp.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @condBranch
 func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   cond_br %arg0, ^bb1, ^bb2
@@ -47,8 +46,6 @@
 // PromoteBuffersToStack expected behavior:
 // Since the alloc has dynamic type, it is not converted into an alloca.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @condBranchDynamicType
 func @condBranchDynamicType(
   %arg0: i1,
@@ -79,6 +76,78 @@
 
 // -----
 
+// CHECK-LABEL: func @condBranchDynamicRanked
+func @condBranchDynamicRanked(
+  %arg0: i1,
+  %tensor: tensor<*xf32>) {
+  cond_br %arg0, ^bb1, ^bb2
+^bb1:
+  br ^bb3
+^bb2:
+  %0 = rank %tensor : tensor<*xf32>
+  %1 = alloc(%0) : memref<?xindex>
+  br ^bb3
+^bb3:
+  return
+}
+
+// CHECK-NEXT: cond_br
+//      CHECK: ^bb2
+//      CHECK: ^bb2
+// CHECK-NEXT: %[[RANK:.*]] = rank
+// CHECK-NEXT: %[[ALLOCA:.*]] = alloca(%[[RANK]])
+//      CHECK: br ^bb3
+
+// -----
+
+// CHECK-LABEL: func @condBranchDynamicRanked2D
+func @condBranchDynamicRanked2D(
+  %arg0: i1,
+  %tensor: tensor<*xf32>) {
+  cond_br %arg0, ^bb1, ^bb2
+^bb1:
+  br ^bb3
+^bb2:
+  %0 = rank %tensor : tensor<*xf32>
+  %1 = alloc(%0, %0) : memref<?x?xindex>
+  br ^bb3
+^bb3:
+  return
+}
+
+// CHECK-NEXT: cond_br
+//      CHECK: ^bb2
+//      CHECK: ^bb2
+// CHECK-NEXT: %[[RANK:.*]] = rank
+//  RANK-NEXT: %[[ALLOC:.*]] = alloca(%[[RANK]], %[[RANK]])
+// DEFINDEX-NEXT: %[[ALLOC:.*]] = alloc(%[[RANK]], %[[RANK]])
+//      CHECK: br ^bb3
+
+// -----
+
+// CHECK-LABEL: func @condBranchDynamicNoRank
+func @condBranchDynamicNoRank(
+  %arg0: i1,
+  %arg1: index,
+  %tensor: tensor<*xf32>) {
+  cond_br %arg0, ^bb1, ^bb2
+^bb1:
+  br ^bb3
+^bb2:
+  %0 = alloc(%arg1) : memref<?xindex>
+  br ^bb3
+^bb3:
+  return
+}
+
+// CHECK-NEXT: cond_br
+//      CHECK: ^bb2
+//      CHECK: ^bb2
+// CHECK-NEXT: %[[ALLOC:.*]] = alloc
+//      CHECK: br ^bb3
+
+// -----
+
 // Test Case: Existing AllocOp with no users.
 // PromoteBuffersToStack expected behavior: It should convert it to an
 // AllocaOp.
@@ -102,8 +171,6 @@
 // PromoteBuffersToStack expected behavior: It should convert it into an
 // AllocaOp.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @criticalEdge
 func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
@@ -132,8 +199,6 @@
 //    bb2
 // PromoteBuffersToStack expected behavior: It converts the alloc in an alloca.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @invCriticalEdge
 func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
@@ -161,8 +226,6 @@
 //    bb3 <- Initial position of the second AllocOp
 // PromoteBuffersToStack expected behavior: It converts the allocs into allocas.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @ifElse
 func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
@@ -198,8 +261,6 @@
 //    bb3
 // PromoteBuffersToStack expected behavior: It converts the alloc into alloca.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @ifElseNoUsers
 func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
@@ -233,8 +294,6 @@
 // PromoteBuffersToStack expected behavior: The two allocs should be converted
 // into allocas.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @ifElseNested
 func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
@@ -270,8 +329,6 @@
 // PromoteBuffersToStack expected behavior: It converts the two AllocOps into
 // allocas.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @redundantOperations
 func @redundantOperations(%arg0: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
@@ -299,8 +356,6 @@
 // PromoteBuffersToStack expected behavior: Both AllocOps are converted into
 // allocas.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc
 func @moving_alloc_and_inserting_missing_dealloc(
   %cond: i1,
@@ -335,8 +390,6 @@
 // PromoteBuffersToStack expected behavior: The AllocOps are converted into
 // allocas.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @nested_regions_and_cond_branch
 func @nested_regions_and_cond_branch(
   %arg0: i1,
@@ -373,8 +426,6 @@
 // there is no conversion allowed. The second alloc is converted, since it
 // only remains in the scope of the function.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @memref_in_function_results
 func @memref_in_function_results(
   %arg0: memref<5xf32>,
@@ -583,4 +634,5 @@
 // DEFINDEX-NEXT: alloca()
 // BIGINDEX-NEXT: alloca()
 // LOWLIMIT-NEXT: alloc()
+// RANK-NEXT: alloca()
 // CHECK-NEXT: return