diff --git a/mlir/include/mlir/Dialect/Async/Passes.td b/mlir/include/mlir/Dialect/Async/Passes.td
--- a/mlir/include/mlir/Dialect/Async/Passes.td
+++ b/mlir/include/mlir/Dialect/Async/Passes.td
@@ -27,9 +27,9 @@
       "int32_t", /*default=*/"8",
       "The number of available workers to execute async operations.">,
 
-    Option<"targetBlockSize", "target-block-size",
+    Option<"minTaskSize", "min-task-size",
       "int32_t", /*default=*/"1000",
-      "The target block size for sharding parallel operation.">
+      "The minimum task size for sharding parallel operation.">
   ];
 
   let dependentDialects = ["async::AsyncDialect", "scf::SCFDialect"];
diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp
--- a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp
+++ b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp
@@ -92,10 +92,10 @@
   AsyncParallelForPass() = default;
 
   AsyncParallelForPass(bool asyncDispatch, int32_t numWorkerThreads,
-                       int32_t targetBlockSize) {
+                       int32_t minTaskSize) {
     this->asyncDispatch = asyncDispatch;
     this->numWorkerThreads = numWorkerThreads;
-    this->targetBlockSize = targetBlockSize;
+    this->minTaskSize = minTaskSize;
   }
 
   void runOnOperation() override;
@@ -104,9 +104,9 @@
 struct AsyncParallelForRewrite : public OpRewritePattern<scf::ParallelOp> {
 public:
   AsyncParallelForRewrite(MLIRContext *ctx, bool asyncDispatch,
-                          int32_t numWorkerThreads, int32_t targetBlockSize)
+                          int32_t numWorkerThreads, int32_t minTaskSize)
       : OpRewritePattern(ctx), asyncDispatch(asyncDispatch),
-        numWorkerThreads(numWorkerThreads), targetBlockSize(targetBlockSize) {}
+        numWorkerThreads(numWorkerThreads), minTaskSize(minTaskSize) {}
 
   LogicalResult matchAndRewrite(scf::ParallelOp op,
                                 PatternRewriter &rewriter) const override;
@@ -114,7 +114,7 @@
 private:
   bool asyncDispatch;
   int32_t numWorkerThreads;
-  int32_t targetBlockSize;
+  int32_t minTaskSize;
 };
 
 struct ParallelComputeFunctionType {
@@ -564,7 +564,7 @@
 // Dispatch parallel compute functions by submitting all async compute tasks
 // from a simple for loop in the caller thread.
 static void
-doSequantialDispatch(ImplicitLocOpBuilder &b, PatternRewriter &rewriter,
+doSequentialDispatch(ImplicitLocOpBuilder &b, PatternRewriter &rewriter,
                      ParallelComputeFunction &parallelComputeFunction,
                      scf::ParallelOp op, Value blockSize, Value blockCount,
                      const SmallVector<Value> &tripCounts) {
@@ -684,15 +684,15 @@
         std::max(1, static_cast<int>(numWorkerThreads * overshardingFactor)));
 
     // Target block size from the pass parameters.
-    Value targetComputeBlock = b.create<ConstantIndexOp>(targetBlockSize);
+    Value minTaskSizeCst = b.create<ConstantIndexOp>(minTaskSize);
 
     // Compute parallel block size from the parallel problem size:
     //   blockSize = min(tripCount,
     //                   max(ceil_div(tripCount, maxComputeBlocks),
-    //                       targetComputeBlock))
+    //                       ceil_div(minTaskSize, bodySize)))
     Value bs0 = b.create<SignedCeilDivIOp>(tripCount, maxComputeBlocks);
-    Value bs1 = b.create<CmpIOp>(CmpIPredicate::sge, bs0, targetComputeBlock);
-    Value bs2 = b.create<SelectOp>(bs1, bs0, targetComputeBlock);
+    Value bs1 = b.create<CmpIOp>(CmpIPredicate::sge, bs0, minTaskSizeCst);
+    Value bs2 = b.create<SelectOp>(bs1, bs0, minTaskSizeCst);
     Value bs3 = b.create<CmpIOp>(CmpIPredicate::sle, tripCount, bs2);
     Value blockSize0 = b.create<SelectOp>(bs3, tripCount, bs2);
     Value blockCount0 = b.create<SignedCeilDivIOp>(tripCount, blockSize0);
@@ -712,7 +712,7 @@
       doAsyncDispatch(b, rewriter, parallelComputeFunction, op, blockSize,
                       blockCount, tripCounts);
     } else {
-      doSequantialDispatch(b, rewriter, parallelComputeFunction, op, blockSize,
+      doSequentialDispatch(b, rewriter, parallelComputeFunction, op, blockSize,
                            blockCount, tripCounts);
     }
 
@@ -733,7 +733,7 @@
 
   RewritePatternSet patterns(ctx);
   patterns.add<AsyncParallelForRewrite>(ctx, asyncDispatch, numWorkerThreads,
-                                        targetBlockSize);
+                                        minTaskSize);
 
   if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns))))
     signalPassFailure();
@@ -743,9 +743,9 @@
   return std::make_unique<AsyncParallelForPass>();
 }
 
-std::unique_ptr<Pass>
-mlir::createAsyncParallelForPass(bool asyncDispatch, int32_t numWorkerThreads,
-                                 int32_t targetBlockSize) {
+std::unique_ptr<Pass> mlir::createAsyncParallelForPass(bool asyncDispatch,
+                                                       int32_t numWorkerThreads,
+                                                       int32_t minTaskSize) {
   return std::make_unique<AsyncParallelForPass>(asyncDispatch, numWorkerThreads,
-                                                targetBlockSize);
+                                                minTaskSize);
 }
diff --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
--- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
@@ -31,7 +31,7 @@
 
 // RUN:   mlir-opt %s -async-parallel-for="async-dispatch=false                \
 // RUN:                                    num-workers=20                      \
-// RUN:                                    target-block-size=1"                \
+// RUN:                                    min-task-size=1"                \
 // RUN:               -async-to-async-runtime                                  \
 // RUN:               -async-runtime-ref-counting                              \
 // RUN:               -async-runtime-ref-counting-opt                          \
diff --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
--- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
@@ -29,7 +29,7 @@
 
 // RUN:   mlir-opt %s -async-parallel-for="async-dispatch=false                \
 // RUN:                                    num-workers=20                      \
-// RUN:                                    target-block-size=1"                \
+// RUN:                                    min-task-size=1"                \
 // RUN:               -async-to-async-runtime                                  \
 // RUN:               -async-runtime-ref-counting                              \
 // RUN:               -async-runtime-ref-counting-opt                          \