diff --git a/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir b/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir
--- a/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir
+++ b/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir
@@ -1,10 +1,11 @@
 // RUN: mlir-rocm-runner %s --shared-libs=%rocm_wrapper_library_dir/librocm-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s
 
 func @other_func(%arg0 : f32, %arg1 : memref<?xf32>) {
-  %cst = constant 1 : index
-  %cst2 = dim %arg1, 0 : memref<?xf32>
-  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst)
-             threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) {
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %block_dim = dim %arg1, %c0 : memref<?xf32>
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
+             threads(%tx, %ty, %tz) in (%block_x = %block_dim, %block_y = %c1, %block_z = %c1) {
     store %arg0, %arg1[%tx] : memref<?xf32>
     gpu.terminator
   }
diff --git a/mlir/test/mlir-rocm-runner/two-modules.mlir b/mlir/test/mlir-rocm-runner/two-modules.mlir
--- a/mlir/test/mlir-rocm-runner/two-modules.mlir
+++ b/mlir/test/mlir-rocm-runner/two-modules.mlir
@@ -4,19 +4,20 @@
 func @main() {
   %arg = alloc() : memref<13xi32>
   %dst = memref_cast %arg : memref<13xi32> to memref<?xi32>
-  %one = constant 1 : index
-  %sx = dim %dst, 0 : memref<?xi32>
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %sx = dim %dst, %c0 : memref<?xi32>
   %cast_dst = memref_cast %dst : memref<?xi32> to memref<*xi32>
   call @mgpuMemHostRegisterInt32(%cast_dst) : (memref<*xi32>) -> ()
   %dst_device = call @mgpuMemGetDeviceMemRef1dInt32(%dst) : (memref<?xi32>) -> (memref<?xi32>)
-  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
-             threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
+             threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %c1, %block_z = %c1) {
     %t0 = index_cast %tx : index to i32
     store %t0, %dst_device[%tx] : memref<?xi32>
     gpu.terminator
   }
-  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
-             threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
+             threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %c1, %block_z = %c1) {
     %t0 = index_cast %tx : index to i32
     store %t0, %dst_device[%tx] : memref<?xi32>
     gpu.terminator
diff --git a/mlir/test/mlir-rocm-runner/vecadd.mlir b/mlir/test/mlir-rocm-runner/vecadd.mlir
--- a/mlir/test/mlir-rocm-runner/vecadd.mlir
+++ b/mlir/test/mlir-rocm-runner/vecadd.mlir
@@ -1,10 +1,11 @@
 // RUN: mlir-rocm-runner %s --shared-libs=%rocm_wrapper_library_dir/librocm-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s
 
 func @vecadd(%arg0 : memref<?xf32>, %arg1 : memref<?xf32>, %arg2 : memref<?xf32>) {
-  %cst = constant 1 : index
-  %cst2 = dim %arg0, 0 : memref<?xf32>
-  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst)
-             threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) {
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %block_dim = dim %arg0, %c0 : memref<?xf32>
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
+             threads(%tx, %ty, %tz) in (%block_x = %block_dim, %block_y = %c1, %block_z = %c1) {
     %a = load %arg0[%tx] : memref<?xf32>
     %b = load %arg1[%tx] : memref<?xf32>
     %c = addf %a, %b : f32