diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -746,6 +746,8 @@ "__nv_cos"); patterns.insert>(converter, "__nv_expf", "__nv_exp"); + patterns.insert>(converter, "__nv_tanhf", + "__nv_tanh"); } std::unique_ptr> diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -58,6 +58,8 @@ "_ocml_cos_f64"); patterns.insert>(converter, "_ocml_exp_f32", "_ocml_exp_f64"); + patterns.insert>(converter, "_ocml_tanh_f32", + "_ocml_tanh_f64"); ConversionTarget target(getContext()); target.addLegalDialect(); diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -157,6 +157,20 @@ // ----- gpu.module @test_module { + // CHECK: llvm.func @__nv_tanhf(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__nv_tanh(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_tanh + func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.tanh %arg_f32 : f32 + // CHECK: llvm.call @__nv_tanhf(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.tanh %arg_f64 : f64 + // CHECK: llvm.call @__nv_tanh(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- +gpu.module @test_module { // CHECK: llvm.func @__nv_expf(!llvm.float) -> !llvm.float // CHECK: llvm.func @__nv_exp(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_exp diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -84,6 +84,20 @@ // ----- gpu.module @kernel_module { + // CHECK: llvm.func @_ocml_tanh_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @_ocml_tanh_f64(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_tanh + func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) { + %result32 = std.tanh %arg_f32 : f32 + // CHECK: llvm.call @_ocml_tanh_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.tanh %arg_f64 : f64 + // CHECK: llvm.call @_ocml_tanh_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return + } +} + +// ----- +gpu.module @kernel_module { // CHECK: llvm.func @_ocml_exp_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @_ocml_exp_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_exp