diff --git a/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir b/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir --- a/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir +++ b/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir @@ -1,10 +1,11 @@ // RUN: mlir-rocm-runner %s --shared-libs=%rocm_wrapper_library_dir/librocm-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s func @other_func(%arg0 : f32, %arg1 : memref) { - %cst = constant 1 : index - %cst2 = dim %arg1, 0 : memref - gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst) - threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %block_dim = dim %arg1, %c0 : memref + gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1) + threads(%tx, %ty, %tz) in (%block_x = %block_dim, %block_y = %c1, %block_z = %c1) { store %arg0, %arg1[%tx] : memref gpu.terminator } diff --git a/mlir/test/mlir-rocm-runner/two-modules.mlir b/mlir/test/mlir-rocm-runner/two-modules.mlir --- a/mlir/test/mlir-rocm-runner/two-modules.mlir +++ b/mlir/test/mlir-rocm-runner/two-modules.mlir @@ -4,19 +4,20 @@ func @main() { %arg = alloc() : memref<13xi32> %dst = memref_cast %arg : memref<13xi32> to memref - %one = constant 1 : index - %sx = dim %dst, 0 : memref + %c0 = constant 0 : index + %c1 = constant 1 : index + %sx = dim %dst, %c0 : memref %cast_dst = memref_cast %dst : memref to memref<*xi32> call @mgpuMemHostRegisterInt32(%cast_dst) : (memref<*xi32>) -> () %dst_device = call @mgpuMemGetDeviceMemRef1dInt32(%dst) : (memref) -> (memref) - gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one) - threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) { + gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1) + threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %c1, %block_z = %c1) { %t0 = index_cast %tx : index to i32 store %t0, %dst_device[%tx] : memref gpu.terminator } - gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one) - threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) { + gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1) + threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %c1, %block_z = %c1) { %t0 = index_cast %tx : index to i32 store %t0, %dst_device[%tx] : memref gpu.terminator diff --git a/mlir/test/mlir-rocm-runner/vecadd.mlir b/mlir/test/mlir-rocm-runner/vecadd.mlir --- a/mlir/test/mlir-rocm-runner/vecadd.mlir +++ b/mlir/test/mlir-rocm-runner/vecadd.mlir @@ -1,10 +1,11 @@ // RUN: mlir-rocm-runner %s --shared-libs=%rocm_wrapper_library_dir/librocm-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s func @vecadd(%arg0 : memref, %arg1 : memref, %arg2 : memref) { - %cst = constant 1 : index - %cst2 = dim %arg0, 0 : memref - gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst) - threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %block_dim = dim %arg0, %c0 : memref + gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1) + threads(%tx, %ty, %tz) in (%block_x = %block_dim, %block_y = %c1, %block_z = %c1) { %a = load %arg0[%tx] : memref %b = load %arg1[%tx] : memref %c = addf %a, %b : f32