diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
--- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
@@ -48,9 +48,10 @@
 /// Sparsification options.
 struct SparsificationOptions {
   SparsificationOptions(SparseParallelizationStrategy p,
-                        SparseVectorizationStrategy v, unsigned vl, bool e)
+                        SparseVectorizationStrategy v, unsigned vl, bool e,
+                        bool vla = false)
       : parallelizationStrategy(p), vectorizationStrategy(v), vectorLength(vl),
-        enableSIMDIndex32(e) {}
+        enableSIMDIndex32(e), enableVLAVectorization(vla) {}
   SparsificationOptions()
       : SparsificationOptions(SparseParallelizationStrategy::kNone,
                               SparseVectorizationStrategy::kNone, 1u, false) {}
@@ -58,6 +59,7 @@
   SparseVectorizationStrategy vectorizationStrategy;
   unsigned vectorLength;
   bool enableSIMDIndex32;
+  bool enableVLAVectorization;
 };
 
 /// Sets up sparsification rewriting rules with the given options.
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
--- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
@@ -70,7 +70,9 @@
     Option<"vectorLength", "vl", "int32_t", "1",
            "Set the vector length">,
     Option<"enableSIMDIndex32", "enable-simd-index32", "bool", "false",
-           "Enable i32 indexing into vectors (for efficiency)">
+           "Enable i32 indexing into vectors (for efficiency)">,
+    Option<"enableVLAVectorization", "enable-vla-vectorization", "bool",
+           "false", "Enable vector length agnostic vectorization">
   ];
 }
 
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp
@@ -68,7 +68,8 @@
     RewritePatternSet patterns(ctx);
     // Translate strategy flags to strategy options.
     SparsificationOptions options(parallelOption(), vectorOption(),
-                                  vectorLength, enableSIMDIndex32);
+                                  vectorLength, enableSIMDIndex32,
+                                  enableVLAVectorization);
     // Apply rewriting.
     populateSparsificationPatterns(patterns, options);
     vector::populateVectorToVectorCanonicalizationPatterns(patterns);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -56,7 +56,7 @@
         idxs(numTensors, std::vector<Value>(numLoops)), redExp(-1u), redVal(),
         redKind(kNoReduc), sparseOut(op), outerParNest(nest), lexIdx(),
         expValues(), expFilled(), expAdded(), expCount(), curVecLength(1),
-        curVecMask() {}
+        curVecMask(), generateVLA(false) {}
   /// Sparsification options.
   SparsificationOptions options;
   /// Universal dense indices and upper bounds (by index). The loops array
@@ -95,6 +95,8 @@
   // Current vector length and mask.
   unsigned curVecLength;
   Value curVecMask;
+  // Generate vector code in a vector length agnostic way
+  bool generateVLA;
 };
 
 } // namespace
@@ -554,7 +556,8 @@
 
 /// Constructs vector type.
 static VectorType vectorType(CodeGen &codegen, Type etp) {
-  return VectorType::get(codegen.curVecLength, etp);
+  return VectorType::get(codegen.curVecLength, etp,
+                         codegen.generateVLA ? 1 : 0);
 }
 
 /// Constructs vector type from pointer.
@@ -1119,14 +1122,21 @@
       isParallelFor(codegen, isOuter, isReduction, isSparse, isVector);
 
   // Prepare vector length.
-  if (isVector)
+  if (isVector) {
     codegen.curVecLength = codegen.options.vectorLength;
+    codegen.generateVLA = codegen.options.enableVLAVectorization;
+  }
 
   // Loop bounds and increment.
   Location loc = op.getLoc();
   Value lo = isSparse ? codegen.pidxs[tensor][idx] : codegen.loops[idx];
   Value hi = isSparse ? codegen.highs[tensor][idx] : codegen.sizes[idx];
   Value step = constantIndex(rewriter, loc, codegen.curVecLength);
+  if (codegen.generateVLA) {
+    Value vscale = rewriter.create<vector::VectorScaleOp>(
+        loc, IndexType::get(rewriter.getContext()));
+    step = rewriter.create<arith::MulIOp>(loc, vscale, step);
+  }
 
   // Emit a parallel loop.
   if (isParallel) {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
@@ -6,6 +6,8 @@
 // RUN:   FileCheck %s --check-prefix=CHECK-VEC2
 // RUN: mlir-opt %s -sparsification="vectorization-strategy=2 vl=16 enable-simd-index32=true" -cse -split-input-file | \
 // RUN:   FileCheck %s --check-prefix=CHECK-VEC3
+// RUN: mlir-opt %s -sparsification="vectorization-strategy=2 vl=4 enable-vla-vectorization=true" -cse -split-input-file | \
+// RUN:   FileCheck %s --check-prefix=CHECK-VEC4
 
 #DenseVector = #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>
 
@@ -54,6 +56,24 @@
 // CHECK-VEC2:       }
 // CHECK-VEC2:       return
 //
+// CHECK-VEC4:       #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)
+// CHECK-VEC4-LABEL: func @scale_d
+// CHECK-VEC4-DAG:   %[[c0:.*]] = arith.constant 0 : index
+// CHECK-VEC4-DAG:   %[[c4:.*]] = arith.constant 4 : index
+// CHECK-VEC4-DAG:   %[[c1024:.*]] = arith.constant 1024 : index
+// CHECK-VEC4-DAG:   %[[passthru:.*]] = arith.constant dense<0.000000e+00> : vector<[4]xf32>
+// CHECK-VEC4-DAG:   %[[vscale:.*]] = vector.vscale
+// CHECK-VEC4:       %[[step:.*]] = arith.muli %[[vscale]], %[[c4]] : index
+// CHECK-VEC4:       scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[step]] {
+// CHECK-VEC4:         %[[sub:.*]] = affine.min #[[$map]](%[[c1024]], %[[i]])[%[[step]]]
+// CHECK-VEC4:         %[[mask:.*]] = vector.create_mask %[[sub]] : vector<[4]xi1>
+// CHECK-VEC4:         %[[val:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %[[passthru]] : memref<?xf32>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32>
+// CHECK-VEC4:         %[[scalev:.*]] = vector.broadcast %{{.*}} : f32 to vector<[4]xf32>
+// CHECK-VEC4:         %[[scaled:.*]] = arith.mulf %[[val]], %[[scalev]] : vector<[4]xf32>
+// CHECK-VEC4:         vector.maskedstore %{{.*}}[%[[i]]], %[[mask]], %[[scaled]] : memref<1024xf32>, vector<[4]xi1>, vector<[4]xf32>
+// CHECK-VEC4:       }
+// CHECK-VEC4:       return
+//
 func @scale_d(%arga: tensor<1024xf32, #DenseVector>, %b: f32, %argx: tensor<1024xf32>) -> tensor<1024xf32> {
   %0 = linalg.generic #trait_scale_d
     ins(%arga: tensor<1024xf32, #DenseVector>)
@@ -169,6 +189,33 @@
 // CHECK-VEC3:       }
 // CHECK-VEC3:       return
 //
+// CHECK-VEC4:       #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)
+// CHECK-VEC4-LABEL: func @mul_s
+// CHECK-VEC4-DAG:   %[[c0:.*]] = arith.constant 0 : index
+// CHECK-VEC4-DAG:   %[[c1:.*]] = arith.constant 1 : index
+// CHECK-VEC4-DAG:   %[[c4:.*]] = arith.constant 4 : index
+// CHECK-VEC4-DAG:   %[[passthrui32:.*]] = arith.constant dense<0> : vector<[4]xi32>
+// CHECK-VEC4-DAG:   %[[passthruf32:.*]] = arith.constant dense<0.000000e+00> : vector<[4]xf32>
+// CHECK-VEC4:       %[[p:.*]] = memref.load %{{.*}}[%[[c0]]] : memref<?xi32>
+// CHECK-VEC4:       %[[a:.*]] = arith.extui %[[p]] : i32 to i64
+// CHECK-VEC4:       %[[q:.*]] = arith.index_cast %[[a]] : i64 to index
+// CHECK-VEC4:       %[[r:.*]] = memref.load %{{.*}}[%[[c1]]] : memref<?xi32>
+// CHECK-VEC4:       %[[b:.*]] = arith.extui %[[r]] : i32 to i64
+// CHECK-VEC4:       %[[s:.*]] = arith.index_cast %[[b]] : i64 to index
+// CHECK-VEC4:       %[[vscale:.*]] = vector.vscale
+// CHECK-VEC4:       %[[step:.*]] = arith.muli %[[vscale]], %[[c4]] : index
+// CHECK-VEC4:       scf.for %[[i:.*]] = %[[q]] to %[[s]] step %[[step]] {
+// CHECK-VEC4:         %[[sub:.*]] = affine.min #[[$map]](%[[s]], %[[i]])[%[[step]]]
+// CHECK-VEC4:         %[[mask:.*]] = vector.create_mask %[[sub]] : vector<[4]xi1>
+// CHECK-VEC4:         %[[li:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %[[passthrui32]] : memref<?xi32>, vector<[4]xi1>, vector<[4]xi32> into vector<[4]xi32>
+// CHECK-VEC4:         %[[lii64:.*]] = arith.extui %[[li]] : vector<[4]xi32> to vector<[4]xi64>
+// CHECK-VEC4:         %[[la:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %[[passthruf32]] : memref<?xf32>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32>
+// CHECK-VEC4:         %[[lb:.*]] = vector.gather %{{.*}}[%[[c0]]] [%[[lii64]]], %[[mask]], %[[passthruf32]] : memref<1024xf32>, vector<[4]xi64>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32>
+// CHECK-VEC4:         %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : vector<[4]xf32>
+// CHECK-VEC4:         vector.scatter %{{.*}}[%[[c0]]] [%[[lii64]]], %[[mask]], %[[m]] : memref<1024xf32>, vector<[4]xi64>, vector<[4]xi1>, vector<[4]xf32>
+// CHECK-VEC4:       }
+// CHECK-VEC4:       return
+//
 func @mul_s(%arga: tensor<1024xf32, #SparseVector>, %argb: tensor<1024xf32>, %argx: tensor<1024xf32>) -> tensor<1024xf32> {
   %0 = linalg.generic #trait_mul_s
     ins(%arga, %argb: tensor<1024xf32, #SparseVector>, tensor<1024xf32>)