Index: mlir/include/mlir/Dialect/Linalg/Passes.h =================================================================== --- mlir/include/mlir/Dialect/Linalg/Passes.h +++ mlir/include/mlir/Dialect/Linalg/Passes.h @@ -31,7 +31,6 @@ std::unique_ptr createLinalgFoldUnitExtentDimsPass(); std::unique_ptr createLinalgElementwiseOpFusionPass(); -std::unique_ptr createFoldReshapeOpsByLinearizationPass(); std::unique_ptr createLinalgNamedOpConversionPass(); Index: mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp =================================================================== --- mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -254,6 +254,10 @@ StringAttr::get(&converter.getContext(), NVVM::NVVMDialect::getKernelFuncAttrName())); + patterns.add>(converter, "__nv_fmaxf", + "__nv_fmax"); + patterns.add>(converter, "__nv_fminf", + "__nv_fmin"); patterns.add>(converter, "__nv_fabsf", "__nv_fabs"); patterns.add>(converter, "__nv_atanf", Index: mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir =================================================================== --- mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -166,6 +166,36 @@ // ----- gpu.module @test_module { + // CHECK: llvm.func @__nv_fmaxf(f32, f32) -> f32 + // CHECK: llvm.func @__nv_fmax(f64, f64) -> f64 + // CHECK-LABEL: func @gpu_fmax + func.func @gpu_fmax(%arg0_f32 : f32, %arg1_f32 : f32, %arg0_f64 : f64, %arg1_f64 : f64) -> (f32, f64) { + %result32 = arith.maxf %arg0_f32, %arg1_f32 : f32 + // CHECK: llvm.call @__nv_fmaxf(%{{.*}}) : (f32, f32) -> f32 + %result64 = arith.maxf %arg0_f64, %arg1_f64 : f64 + // CHECK: llvm.call @__nv_fmax(%{{.*}}) : (f64, f64) -> f64 + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +gpu.module @test_module { + // CHECK: llvm.func @__nv_fminf(f32, f32) -> f32 + // CHECK: llvm.func @__nv_fmin(f64, f64) -> f64 + // CHECK-LABEL: func @gpu_fmin + func.func @gpu_fmin(%arg0_f32 : f32, %arg1_f32 : f32, %arg0_f64 : f64, %arg1_f64 : f64) -> (f32, f64) { + %result32 = arith.minf %arg0_f32, %arg1_f32 : f32 + // CHECK: llvm.call @__nv_fminf(%{{.*}}) : (f32, f32) -> f32 + %result64 = arith.minf %arg0_f64, %arg1_f64 : f64 + // CHECK: llvm.call @__nv_fmin(%{{.*}}) : (f64, f64) -> f64 + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +gpu.module @test_module { // CHECK: llvm.func @__nv_fabsf(f32) -> f32 // CHECK: llvm.func @__nv_fabs(f64) -> f64 // CHECK-LABEL: func @gpu_fabs