diff --git a/mlir/include/mlir/Dialect/GPU/Utils.h b/mlir/include/mlir/Dialect/GPU/Utils.h
--- a/mlir/include/mlir/Dialect/GPU/Utils.h
+++ b/mlir/include/mlir/Dialect/GPU/Utils.h
@@ -38,7 +38,9 @@
 /// Sink operations into the `launchOp` to reduce the number of values that are
 /// used within the region of the operation, but defined outside of the
 /// region.
-LogicalResult sinkOperationsIntoLaunchOp(gpu::LaunchOp launchOp);
+LogicalResult sinkOperationsIntoLaunchOp(
+    gpu::LaunchOp launchOp,
+    llvm::function_ref<bool(Operation *)> isSinkingBeneficiary = nullptr);
 
 } // namespace mlir
 #endif // MLIR_DIALECT_GPU_UTILS_H_
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -59,7 +59,7 @@
 /// Identifies operations that are beneficial to sink into kernels. These
 /// operations may not have side-effects, as otherwise sinking (and hence
 /// duplicating them) is not legal.
-static bool isSinkingBeneficiary(Operation *op) {
+static bool isSinkingBeneficiaryDefault(Operation *op) {
   return isa<arith::ConstantOp, ConstantOp, memref::DimOp, arith::SelectOp,
              arith::CmpIOp>(op);
 }
@@ -75,11 +75,11 @@
 /// the order they should appear in the kernel. Furthermore, `availableValues`
 /// is updated with results that will be available after sinking the identified
 /// ops.
-static bool
-extractBeneficiaryOps(Operation *op,
-                      const SetVector<Value> &existingDependencies,
-                      SetVector<Operation *> &beneficiaryOps,
-                      llvm::SmallPtrSetImpl<Value> &availableValues) {
+static bool extractBeneficiaryOps(
+    Operation *op, const SetVector<Value> &existingDependencies,
+    SetVector<Operation *> &beneficiaryOps,
+    llvm::SmallPtrSetImpl<Value> &availableValues,
+    llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) {
   if (beneficiaryOps.count(op))
     return true;
 
@@ -93,9 +93,9 @@
     // Else check whether it can be made available via sinking or already is a
     // dependency.
     Operation *definingOp = operand.getDefiningOp();
-    if ((!definingOp ||
-         !extractBeneficiaryOps(definingOp, existingDependencies,
-                                beneficiaryOps, availableValues)) &&
+    if ((!definingOp || !extractBeneficiaryOps(definingOp, existingDependencies,
+                                               beneficiaryOps, availableValues,
+                                               isSinkingBeneficiary)) &&
         !existingDependencies.count(operand))
       return false;
   }
@@ -106,7 +106,9 @@
   return true;
 }
 
-LogicalResult mlir::sinkOperationsIntoLaunchOp(gpu::LaunchOp launchOp) {
+LogicalResult mlir::sinkOperationsIntoLaunchOp(
+    gpu::LaunchOp launchOp,
+    llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) {
   Region &launchOpBody = launchOp.body();
 
   // Identify uses from values defined outside of the scope of the launch
@@ -120,7 +122,9 @@
     Operation *operandOp = operand.getDefiningOp();
     if (!operandOp)
       continue;
-    extractBeneficiaryOps(operandOp, sinkCandidates, toBeSunk, availableValues);
+    extractBeneficiaryOps(operandOp, sinkCandidates, toBeSunk, availableValues,
+                          isSinkingBeneficiary ? isSinkingBeneficiary
+                                               : isSinkingBeneficiaryDefault);
   }
 
   // Insert operations so that the defs get cloned before uses.
diff --git a/mlir/unittests/Dialect/CMakeLists.txt b/mlir/unittests/Dialect/CMakeLists.txt
--- a/mlir/unittests/Dialect/CMakeLists.txt
+++ b/mlir/unittests/Dialect/CMakeLists.txt
@@ -7,6 +7,7 @@
   MLIRDialect)
 
 add_subdirectory(Affine)
+add_subdirectory(GPU)
 add_subdirectory(Quant)
 add_subdirectory(SparseTensor)
 add_subdirectory(SPIRV)
diff --git a/mlir/unittests/Dialect/GPU/CMakeLists.txt b/mlir/unittests/Dialect/GPU/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/unittests/Dialect/GPU/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_mlir_unittest(MLIRGPUTests
+  KernelOutliningTest.cpp
+)
+target_link_libraries(MLIRGPUTests
+  PRIVATE
+  MLIRIR
+  MLIRGPUOps
+  MLIRGPUTransforms
+)
diff --git a/mlir/unittests/Dialect/GPU/KernelOutliningTest.cpp b/mlir/unittests/Dialect/GPU/KernelOutliningTest.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/unittests/Dialect/GPU/KernelOutliningTest.cpp
@@ -0,0 +1,57 @@
+//===- KernelOutliningTest.cpp - GPU kernel outlining utilities tests -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/GPU/GPUDialect.h"
+#include "mlir/Dialect/GPU/Utils.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Parser.h"
+#include "gtest/gtest.h"
+
+using namespace mlir;
+
+namespace {
+TEST(GPUKernelOutliningTest, IsSinkingBeneficiary) {
+  MLIRContext context;
+  context.loadDialect<gpu::GPUDialect>();
+  context.allowUnregisteredDialects();
+
+  const char *const code = R"mlir(
+    %0 = "test.src"() : () -> (index)
+    %1 = "test.foo"() : () -> (i32)
+    %2 = "test.bar"() : () -> (i32)
+    gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %0, %grid_y = %0, %grid_z = %0)
+               threads(%tx, %ty, %tz) in (%block_x = %0, %block_y = %0, %block_z = %0) {
+      "test.baz"(%1, %2) : (i32, i32) -> ()
+      gpu.terminator
+    }
+  )mlir";
+
+  OwningOpRef<ModuleOp> module = mlir::parseSourceString(code, &context);
+  ASSERT_TRUE(module);
+
+  auto ops = module.get().body().getOps<gpu::LaunchOp>();
+  ASSERT_TRUE(llvm::hasSingleElement(ops));
+
+  auto launch = *ops.begin();
+
+  auto isSinkingBeneficiary = [&](Operation *op) -> bool {
+    return op->getName() == OperationName("test.foo", &context);
+  };
+
+  ASSERT_TRUE(
+      succeeded(sinkOperationsIntoLaunchOp(launch, isSinkingBeneficiary)));
+
+  auto &launchOps = launch.body().front().getOperations();
+  ASSERT_EQ(launchOps.size(), 3);
+  auto it = launchOps.begin();
+
+  EXPECT_EQ(std::next(it, 0)->getName(), OperationName("test.foo", &context));
+  EXPECT_EQ(std::next(it, 1)->getName(), OperationName("test.baz", &context));
+  EXPECT_TRUE(isa<gpu::TerminatorOp>(*std::next(it, 2)));
+}
+} // namespace