diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -280,6 +280,8 @@ "__ocml_tanh_f64"); populateOpPatterns(converter, patterns, "__ocml_tan_f32", "__ocml_tan_f64"); + populateOpPatterns(converter, patterns, "__ocml_erf_f32", + "__ocml_erf_f64"); } std::unique_ptr> diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -437,6 +437,21 @@ // ----- +gpu.module @test_module { + // CHECK: llvm.func @__ocml_erf_f32(f32) -> f32 + // CHECK: llvm.func @__ocml_erf_f64(f64) -> f64 + // CHECK-LABEL: func @gpu_erf + func.func @gpu_erf(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.erf %arg_f32 : f32 + // CHECK: llvm.call @__ocml_erf_f32(%{{.*}}) : (f32) -> f32 + %result64 = math.erf %arg_f64 : f64 + // CHECK: llvm.call @__ocml_erf_f64(%{{.*}}) : (f64) -> f64 + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + gpu.module @test_module { // CHECK-LABEL: func @gpu_unroll func.func @gpu_unroll(%arg0 : vector<4xf32>) -> vector<4xf32> {