diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir --- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-gpu)" -canonicalize | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-gpu),canonicalize" | FileCheck %s #map0 = affine_map<(d0, d1) -> (d1, d0)> #map1 = affine_map<(d0, d1, d2) -> (d0, d2)> diff --git a/mlir/test/Dialect/GPU/promotion.mlir b/mlir/test/Dialect/GPU/promotion.mlir --- a/mlir/test/Dialect/GPU/promotion.mlir +++ b/mlir/test/Dialect/GPU/promotion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect -test-gpu-memory-promotion -pass-pipeline='gpu.module(gpu.func(test-gpu-memory-promotion))' -split-input-file %s | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='gpu.module(gpu.func(test-gpu-memory-promotion))' -split-input-file %s | FileCheck %s gpu.module @foo { diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir @@ -1,9 +1,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" -canonicalize |\ // RUN: mlir-opt -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if |\ // RUN: mlir-opt -lower-affine -convert-scf-to-cf -convert-vector-to-llvm \ -// RUN: -convert-arith-to-llvm -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -convert-arith-to-llvm -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir @@ -2,9 +2,9 @@ // everything on the same thread. // RUN: mlir-opt %s -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ @@ -15,9 +15,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write" \ // RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ @@ -27,9 +27,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" \ // RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir --- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir +++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ -// RUN: --convert-scf-to-cf -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ +// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir --- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir +++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ -// RUN: --convert-scf-to-cf -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ +// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/async.mlir b/mlir/test/Integration/GPU/CUDA/async.mlir --- a/mlir/test/Integration/GPU/CUDA/async.mlir +++ b/mlir/test/Integration/GPU/CUDA/async.mlir @@ -1,9 +1,9 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-async-region -gpu-to-llvm \ -// RUN: -async-to-async-runtime -async-runtime-ref-counting \ -// RUN: -convert-async-to-llvm -convert-func-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-async-region -gpu-to-llvm \ +// RUN: | mlir-opt -async-to-async-runtime -async-runtime-ref-counting \ +// RUN: | mlir-opt -convert-async-to-llvm -convert-func-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_async_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir --- a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir +++ b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir --- a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir +++ b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/shuffle.mlir b/mlir/test/Integration/GPU/CUDA/shuffle.mlir --- a/mlir/test/Integration/GPU/CUDA/shuffle.mlir +++ b/mlir/test/Integration/GPU/CUDA/shuffle.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/two-modules.mlir b/mlir/test/Integration/GPU/CUDA/two-modules.mlir --- a/mlir/test/Integration/GPU/CUDA/two-modules.mlir +++ b/mlir/test/Integration/GPU/CUDA/two-modules.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir --- a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir +++ b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/printf.mlir b/mlir/test/Integration/GPU/ROCM/printf.mlir --- a/mlir/test/Integration/GPU/ROCM/printf.mlir +++ b/mlir/test/Integration/GPU/ROCM/printf.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP},gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP},gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/two-modules.mlir b/mlir/test/Integration/GPU/ROCM/two-modules.mlir --- a/mlir/test/Integration/GPU/ROCM/two-modules.mlir +++ b/mlir/test/Integration/GPU/ROCM/two-modules.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/vecadd.mlir b/mlir/test/Integration/GPU/ROCM/vecadd.mlir --- a/mlir/test/Integration/GPU/ROCM/vecadd.mlir +++ b/mlir/test/Integration/GPU/ROCM/vecadd.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt %s \ -// RUN: -convert-scf-to-cf \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true},gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm=use-bare-pointers-for-kernels=true \ +// RUN: | mlir-opt -convert-scf-to-cf \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true},gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -gpu-to-llvm=use-bare-pointers-for-kernels=true \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir --- a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir +++ b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt %s \ -// RUN: -convert-scf-to-cf \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32},gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -convert-scf-to-cf \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32},gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Pass/pipeline-parsing.mlir b/mlir/test/Pass/pipeline-parsing.mlir --- a/mlir/test/Pass/pipeline-parsing.mlir +++ b/mlir/test/Pass/pipeline-parsing.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass,func.func(test-function-pass)),func.func(test-function-pass)' -pass-pipeline="func.func(cse,canonicalize)" -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s +// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass,func.func(test-function-pass)),func.func(test-function-pass),func.func(cse,canonicalize)' -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s // RUN: mlir-opt %s -mlir-disable-threading -test-textual-pm-nested-pipeline -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=TEXTUAL_CHECK // RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass),any(test-interface-pass),any(test-interface-pass),func.func(test-function-pass),any(canonicalize),func.func(cse)' -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=GENERIC_MERGE_CHECK // RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s diff --git a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir --- a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir +++ b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}" -reconcile-unrealized-casts | mlir-cpu-runner -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts" | mlir-cpu-runner -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s // Verify bare pointer memref calling convention. `simple_add1_add2_test` // gets two 2xf32 memrefs, adds 1.0f to the first one and 2.0f to the second