diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp @@ -481,6 +481,12 @@ spv::GroupNonUniformIMulOp>, &createGroupReduceOpImpl}, + {ReduceType::MIN, + &createGroupReduceOpImpl, + &createGroupReduceOpImpl}, + {ReduceType::MAX, + &createGroupReduceOpImpl, + &createGroupReduceOpImpl}, }; for (auto &handler : handlers) diff --git a/mlir/test/Conversion/GPUToSPIRV/reductions.mlir b/mlir/test/Conversion/GPUToSPIRV/reductions.mlir --- a/mlir/test/Conversion/GPUToSPIRV/reductions.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/reductions.mlir @@ -317,3 +317,323 @@ } } + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupFMin %[[ARG]] : f32 + %reduced = gpu.all_reduce min %arg uniform {} : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformFMin "Workgroup" "Reduce" %[[ARG]] : f32 + %reduced = gpu.all_reduce min %arg {} : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupSMin %[[ARG]] : i32 + %reduced = gpu.all_reduce min %arg uniform {} : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformSMin "Workgroup" "Reduce" %[[ARG]] : i32 + %reduced = gpu.all_reduce min %arg {} : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupFMin %[[ARG]] : f32 + %reduced = gpu.subgroup_reduce min %arg uniform : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformFMin "Subgroup" "Reduce" %[[ARG]] : f32 + %reduced = gpu.subgroup_reduce min %arg : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupSMin %[[ARG]] : i32 + %reduced = gpu.subgroup_reduce min %arg uniform : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformSMin "Subgroup" "Reduce" %[[ARG]] : i32 + %reduced = gpu.subgroup_reduce min %arg : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupFMax %[[ARG]] : f32 + %reduced = gpu.all_reduce max %arg uniform {} : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformFMax "Workgroup" "Reduce" %[[ARG]] : f32 + %reduced = gpu.all_reduce max %arg {} : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupSMax %[[ARG]] : i32 + %reduced = gpu.all_reduce max %arg uniform {} : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformSMax "Workgroup" "Reduce" %[[ARG]] : i32 + %reduced = gpu.all_reduce max %arg {} : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupFMax %[[ARG]] : f32 + %reduced = gpu.subgroup_reduce max %arg uniform : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: f32) + gpu.func @test(%arg : f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformFMax "Subgroup" "Reduce" %[[ARG]] : f32 + %reduced = gpu.subgroup_reduce max %arg : (f32) -> (f32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupSMax %[[ARG]] : i32 + %reduced = gpu.subgroup_reduce max %arg uniform : (i32) -> (i32) + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @test + // CHECK-SAME: (%[[ARG:.*]]: i32) + gpu.func @test(%arg : i32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + // CHECK: %{{.*}} = spirv.GroupNonUniformSMax "Subgroup" "Reduce" %[[ARG]] : i32 + %reduced = gpu.subgroup_reduce max %arg : (i32) -> (i32) + gpu.return + } +} + +}