diff --git a/mlir/include/mlir/Dialect/GPU/Passes.td b/mlir/include/mlir/Dialect/GPU/Passes.td
--- a/mlir/include/mlir/Dialect/GPU/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Passes.td
@@ -19,6 +19,7 @@
 def GpuAsyncRegionPass : FunctionPass<"gpu-async-region"> {
   let summary = "Make GPU ops async";
   let constructor = "mlir::createGpuAsyncRegionPass()";
+  let dependentDialects = ["async::AsyncDialect"];
 }
 
 #endif // MLIR_DIALECT_GPU_PASSES
diff --git a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
--- a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
@@ -78,6 +78,8 @@
     if (op->getNumRegions() > 0)
       return op->emitOpError("regions are not supported");
 
+    auto tokenType = builder.getType<gpu::AsyncTokenType>();
+
     // If there is no current token, insert a `gpu.wait async` without
     // dependencies to create one.
     if (!currentToken)
@@ -108,7 +110,7 @@
   }
 
   OpBuilder builder;
-  const Type tokenType = builder.getType<gpu::AsyncTokenType>();
+
   // The token that represents the current asynchronous dependency. It's valid
   // range starts with a `gpu.wait async` op, and ends with a `gpu.wait` op.
   // In between, each gpu::AsyncOpInterface depends on the current token and
diff --git a/mlir/lib/Dialect/GPU/Transforms/PassDetail.h b/mlir/lib/Dialect/GPU/Transforms/PassDetail.h
--- a/mlir/lib/Dialect/GPU/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/GPU/Transforms/PassDetail.h
@@ -9,6 +9,7 @@
 #ifndef DIALECT_GPU_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_GPU_TRANSFORMS_PASSDETAIL_H_
 
+#include "mlir/Dialect/Async/IR/Async.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {