diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -83,6 +83,8 @@
     libc.src.errno.errno
 
     # stdio.h entrypoints
+    libc.src.stdio.printf
+    libc.src.stdio.fprintf
     libc.src.stdio.snprintf
     libc.src.stdio.vsnprintf
     libc.src.stdio.puts
diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h
--- a/libc/include/llvm-libc-types/rpc_opcodes_t.h
+++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h
@@ -23,6 +23,9 @@
   RPC_FREE = 10,
   RPC_HOST_CALL = 11,
   RPC_ABORT = 12,
+  RPC_PRINTF_TO_STDOUT = 13,
+  RPC_PRINTF_TO_STDERR = 14,
+  RPC_PRINTF_TO_STREAM = 15
 } rpc_opcode_t;
 
 #endif // __LLVM_LIBC_TYPES_RPC_OPCODE_H__
diff --git a/libc/src/__support/arg_list.h b/libc/src/__support/arg_list.h
--- a/libc/src/__support/arg_list.h
+++ b/libc/src/__support/arg_list.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_LIBC_SRC_SUPPORT_ARG_LIST_H
 #define LLVM_LIBC_SRC_SUPPORT_ARG_LIST_H
 
+#include "src/__support/CPP/type_traits.h"
 #include "src/__support/common.h"
 
 #include <stdarg.h>
@@ -60,6 +61,35 @@
   size_t read_count() const { return arg_counter; }
 };
 
+// Used for the GPU implementation of `printf`. This models a variadic list as a
+// simple array of pointers that are built manually by the implementation.
+class ArrayArgList {
+  size_t idx;
+  void **list;
+
+public:
+  LIBC_INLINE ArrayArgList(void **list) : idx(0), list(list) {}
+  LIBC_INLINE ArrayArgList(va_list) {}
+  LIBC_INLINE ArrayArgList(const ArrayArgList &other) {
+    idx = other.idx;
+    list = other.list;
+  }
+  LIBC_INLINE ~ArrayArgList() = default;
+
+  LIBC_INLINE ArrayArgList &operator=(const ArrayArgList &rhs) {
+    idx = rhs.idx;
+    list = rhs.list;
+    return *this;
+  }
+
+  template <class T> LIBC_INLINE T next_var() {
+    if constexpr (cpp::is_same_v<T, char *>)
+      return reinterpret_cast<T>(list[idx++]);
+    else
+      return *reinterpret_cast<T *>(list[idx++]);
+  }
+}; // namespace __llvm_libc
+
 } // namespace internal
 } // namespace __llvm_libc
 
diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt
--- a/libc/src/stdio/CMakeLists.txt
+++ b/libc/src/stdio/CMakeLists.txt
@@ -422,45 +422,6 @@
     libc.src.stdio.printf_core.writer
 )
 
-list(APPEND printf_deps
-      libc.src.__support.arg_list
-      libc.src.stdio.printf_core.vfprintf_internal
-)
-if(LLVM_LIBC_FULL_BUILD)
-  list(APPEND printf_deps
-      libc.src.__support.File.file
-      libc.src.__support.File.platform_file
-      libc.src.__support.File.platform_stdout
-  )
-else()
-  list(APPEND printf_copts "-DLIBC_COPT_PRINTF_USE_SYSTEM_FILE")
-endif()
-
-add_entrypoint_object(
-  printf
-  SRCS
-    printf.cpp
-  HDRS
-    printf.h
-  DEPENDS
-    ${printf_deps}
-  COMPILE_OPTIONS
-    ${printf_copts}
-)
-
-add_entrypoint_object(
-  fprintf
-  SRCS
-    fprintf.cpp
-  HDRS
-    fprintf.h
-  DEPENDS
-    libc.src.__support.arg_list
-    libc.src.stdio.printf_core.vfprintf_internal
-  COMPILE_OPTIONS
-    ${printf_copts}
-)
-
 add_entrypoint_object(
   vsprintf
   SRCS
@@ -483,31 +444,6 @@
     libc.src.stdio.printf_core.writer
 )
 
-add_entrypoint_object(
-  vprintf
-  SRCS
-    vprintf.cpp
-  HDRS
-    vprintf.h
-  DEPENDS
-    ${printf_deps}
-  COMPILE_OPTIONS
-    ${printf_copts}
-)
-
-add_entrypoint_object(
-  vfprintf
-  SRCS
-    vfprintf.cpp
-  HDRS
-    vfprintf.h
-  DEPENDS
-    libc.src.__support.arg_list
-    libc.src.stdio.printf_core.vfprintf_internal
-  COMPILE_OPTIONS
-    ${printf_copts}
-)
-
 add_entrypoint_object(
   ftell
   SRCS
@@ -538,3 +474,7 @@
 add_stdio_entrypoint_object(stdin)
 add_stdio_entrypoint_object(stdout)
 add_stdio_entrypoint_object(stderr)
+add_stdio_entrypoint_object(printf)
+add_stdio_entrypoint_object(fprintf)
+add_stdio_entrypoint_object(vprintf)
+add_stdio_entrypoint_object(vfprintf)
diff --git a/libc/src/stdio/generic/CMakeLists.txt b/libc/src/stdio/generic/CMakeLists.txt
--- a/libc/src/stdio/generic/CMakeLists.txt
+++ b/libc/src/stdio/generic/CMakeLists.txt
@@ -110,3 +110,67 @@
     libc.src.__support.File.file
     libc.src.__support.File.platform_stderr
 )
+
+list(APPEND printf_deps
+      libc.src.__support.arg_list
+      libc.src.stdio.printf_core.vfprintf_internal
+)
+if(LLVM_LIBC_FULL_BUILD)
+  list(APPEND printf_deps
+      libc.src.__support.File.file
+      libc.src.__support.File.platform_file
+      libc.src.__support.File.platform_stdout
+  )
+else()
+  list(APPEND printf_copts "-DLIBC_COPT_PRINTF_USE_SYSTEM_FILE")
+endif()
+
+add_entrypoint_object(
+  printf
+  SRCS
+    printf.cpp
+  HDRS
+    printf.h
+  DEPENDS
+    ${printf_deps}
+  COMPILE_OPTIONS
+    ${printf_copts}
+)
+
+add_entrypoint_object(
+  fprintf
+  SRCS
+    fprintf.cpp
+  HDRS
+    fprintf.h
+  DEPENDS
+    libc.src.__support.arg_list
+    libc.src.stdio.printf_core.vfprintf_internal
+  COMPILE_OPTIONS
+    ${printf_copts}
+)
+
+add_entrypoint_object(
+  vprintf
+  SRCS
+    vprintf.cpp
+  HDRS
+    vprintf.h
+  DEPENDS
+    ${printf_deps}
+  COMPILE_OPTIONS
+    ${printf_copts}
+)
+
+add_entrypoint_object(
+  vfprintf
+  SRCS
+    vfprintf.cpp
+  HDRS
+    vfprintf.h
+  DEPENDS
+    libc.src.__support.arg_list
+    libc.src.stdio.printf_core.vfprintf_internal
+  COMPILE_OPTIONS
+    ${printf_copts}
+)
diff --git a/libc/src/stdio/fprintf.cpp b/libc/src/stdio/generic/fprintf.cpp
rename from libc/src/stdio/fprintf.cpp
rename to libc/src/stdio/generic/fprintf.cpp
diff --git a/libc/src/stdio/printf.cpp b/libc/src/stdio/generic/printf.cpp
rename from libc/src/stdio/printf.cpp
rename to libc/src/stdio/generic/printf.cpp
diff --git a/libc/src/stdio/vfprintf.cpp b/libc/src/stdio/generic/vfprintf.cpp
rename from libc/src/stdio/vfprintf.cpp
rename to libc/src/stdio/generic/vfprintf.cpp
diff --git a/libc/src/stdio/vprintf.cpp b/libc/src/stdio/generic/vprintf.cpp
rename from libc/src/stdio/vprintf.cpp
rename to libc/src/stdio/generic/vprintf.cpp
diff --git a/libc/src/stdio/gpu/CMakeLists.txt b/libc/src/stdio/gpu/CMakeLists.txt
--- a/libc/src/stdio/gpu/CMakeLists.txt
+++ b/libc/src/stdio/gpu/CMakeLists.txt
@@ -4,11 +4,30 @@
     file.h
   DEPENDS
     libc.src.__support.common
+    libc.src.__support.RPC.rpc_client
     .stdin
     .stdout
     .stderr
 )
 
+add_header_library(
+  gpu_parser
+  HDRS
+    parser.h
+  DEPENDS
+    libc.src.__support.arg_list
+)
+
+add_header_library(
+  gpu_printf_impl
+  HDRS
+    print_impl.h
+  DEPENDS
+    libc.src.__support.arg_list
+    .parser
+    .gpu_file
+)
+
 add_entrypoint_object(
   fopen
   SRCS
@@ -90,3 +109,25 @@
   DEPENDS
     libc.include.stdio
 )
+
+add_entrypoint_object(
+  printf
+  SRCS
+    printf.cpp
+  HDRS
+    ../printf.h
+  DEPENDS
+    libc.include.stdio
+)
+
+add_entrypoint_object(
+  fprintf
+  SRCS
+    fprintf.cpp
+  HDRS
+    ../fprintf.h
+  DEPENDS
+    libc.include.stdio
+    libc.src.__support.arg_list
+    libc.src.stdio.printf_core.parser
+)
diff --git a/libc/src/stdio/gpu/file.h b/libc/src/stdio/gpu/file.h
--- a/libc/src/stdio/gpu/file.h
+++ b/libc/src/stdio/gpu/file.h
@@ -1,4 +1,5 @@
-//===--- GPU helper functions--------------------===//
+//===--- GPU helper functions
+//----------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libc/src/stdio/fprintf.cpp b/libc/src/stdio/gpu/fprintf.cpp
rename from libc/src/stdio/fprintf.cpp
rename to libc/src/stdio/gpu/fprintf.cpp
--- a/libc/src/stdio/fprintf.cpp
+++ b/libc/src/stdio/gpu/fprintf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of fprintf -------------------------------*- C++ -*-===//
+//===-- GPU Implementation of fprintf -------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -7,12 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/stdio/fprintf.h"
+#include "src/stdio/gpu/printf_impl.h"
 
-#include "src/__support/File/file.h"
-#include "src/__support/arg_list.h"
-#include "src/stdio/printf_core/vfprintf_internal.h"
-
-#include <stdarg.h>
 #include <stdio.h>
 
 namespace __llvm_libc {
@@ -22,12 +18,10 @@
                     ...)) {
   va_list vlist;
   va_start(vlist, format);
-  internal::ArgList args(vlist); // This holder class allows for easier copying
-                                 // and pointer semantics, as well as handling
-                                 // destruction automatically.
+  internal::ArgList args(vlist);
   va_end(vlist);
-  int ret_val = printf_core::vfprintf_internal(stream, format, args);
-  return ret_val;
+
+  return static_cast<int>(printf_common(stream, format, args));
 }
 
 } // namespace __llvm_libc
diff --git a/libc/src/stdio/gpu/parser.h b/libc/src/stdio/gpu/parser.h
new file mode 100644
--- /dev/null
+++ b/libc/src/stdio/gpu/parser.h
@@ -0,0 +1,192 @@
+//===--------------- Printf format parsing for the GPU --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/arg_list.h"
+#include "src/string/string_utils.h"
+
+namespace __llvm_libc {
+
+namespace gpu {
+
+#ifndef LIBC_COPT_GPU_MOCK_ARG_LIST
+using ArgProvider = internal::ArgList;
+#else  // not defined LIBC_COPT_GPU_MOCK_ARG_LIST
+using ArgProvider = internal::MockArgList;
+#endif // LIBC_COPT_GPU_MOCK_ARG_LIST
+
+// These sizes need to be compatible to simplify parsing the lengths.
+static_assert(sizeof(uintptr_t) == sizeof(long) &&
+                  sizeof(uintptr_t) == sizeof(long long) &&
+                  sizeof(uintptr_t) == sizeof(intmax_t) &&
+                  sizeof(uintptr_t) == sizeof(size_t) &&
+                  sizeof(uintptr_t) == sizeof(ptrdiff_t),
+              "Invalid lengths for target");
+
+enum class LengthModifier { none = 0, l = 1 };
+
+struct Specifier {
+  uintptr_t raw_value;
+  bool is_string;
+  bool has_value;
+};
+
+struct MicroParser {
+  LIBC_INLINE MicroParser(const char *format, ArgProvider args)
+      : format(format), args(args) {}
+
+  Specifier get_next_specifier();
+
+  LIBC_INLINE bool end(const Specifier &cur) const {
+    return format[cur_pos] == '\0' && !cur.has_value;
+  }
+
+  LIBC_INLINE size_t get_size(const Specifier &cur) const {
+    if (end(cur))
+      return 0;
+    else if (cur.is_string)
+      return internal::string_length(reinterpret_cast<char *>(cur.raw_value)) +
+             1;
+    else
+      return sizeof(uintptr_t);
+  }
+
+  LIBC_INLINE const void *get_pointer(const Specifier &cur) const {
+    if (cur.is_string)
+      return reinterpret_cast<void *>(cur.raw_value);
+    else
+      return &cur.raw_value;
+  }
+
+private:
+  LengthModifier parse_length_modifier();
+
+  const char *__restrict const format;
+  ArgProvider args;
+  uint32_t cur_pos = 0;
+};
+
+// Returns true if the character represents a length or type specifier.
+LIBC_INLINE static constexpr bool is_format(char c) {
+  switch (c) {
+  case ' ':
+  case 't':
+  case 'j':
+  case 'z':
+  case 'l':
+  case '%':
+  case 'd':
+  case 'i':
+  case 'o':
+  case 'x':
+  case 'X':
+  case 'u':
+  case 'f':
+  case 'F':
+  case 'e':
+  case 'E':
+  case 'a':
+  case 'A':
+  case 'g':
+  case 'c':
+  case 'G':
+  case 'p':
+  case 's':
+    return true;
+  default:
+    return false;
+  }
+}
+
+LengthModifier MicroParser::parse_length_modifier() {
+  // Skip all characters that aren't related to the length or type.
+  while (format[cur_pos] != '\0' && !is_format(format[cur_pos]))
+    ++cur_pos;
+
+  // We are only concerned with whether or not the length specifier is larger
+  // than a regular integer.
+  switch (format[cur_pos]) {
+  case 'l': {
+    if (format[cur_pos + 1] == 'l')
+      ++cur_pos;
+    [[fallthrough]];
+  case 't':
+  case 'j':
+  case 'z':
+    ++cur_pos;
+    return LengthModifier::l;
+  }
+  default:
+    return LengthModifier::none;
+  };
+  return LengthModifier::none;
+}
+
+Specifier MicroParser::get_next_specifier() {
+  Specifier specifier{};
+  // Skip any characters until we reach a control character or the end.
+  while (format[cur_pos] != '\0' && format[cur_pos] != '%')
+    ++cur_pos;
+
+  if (format[cur_pos] != '\0')
+    cur_pos++;
+  LengthModifier lm = parse_length_modifier();
+
+  // We use the type and length modifier to access the variadic argument
+  // appropriately. All arguments are promoted to a simple integer.
+  specifier.has_value = true;
+  switch (format[cur_pos]) {
+  case 'c':
+    specifier.raw_value = static_cast<uintptr_t>(args.next_var<uint32_t>());
+    break;
+  case 'd':
+  case 'i':
+  case 'o':
+  case 'x':
+  case 'X':
+  case 'u':
+    if (lm == LengthModifier::none)
+      specifier.raw_value = static_cast<uintptr_t>(args.next_var<uint32_t>());
+    else
+      specifier.raw_value = static_cast<uintptr_t>(args.next_var<uint64_t>());
+    break;
+  case 'f':
+  case 'F':
+  case 'e':
+  case 'E':
+  case 'a':
+  case 'A':
+  case 'g':
+  case 'G': {
+    // Type pun the floating point value to get its binary representation.
+    double p = args.next_var<double>();
+    specifier.raw_value =
+        *reinterpret_cast<uintptr_t *>(reinterpret_cast<void *>(&p));
+    break;
+  }
+  case 'p':
+    specifier.raw_value = reinterpret_cast<uintptr_t>(args.next_var<void *>());
+    break;
+  case 's':
+    // Strings require special handling as they cannot simply be promoted.
+    specifier.raw_value = reinterpret_cast<uintptr_t>(args.next_var<void *>());
+    specifier.is_string = true;
+    break;
+  default:
+    // This was a malformed input or a '%' literal.
+    specifier.has_value = false;
+    break;
+  }
+  if (format[cur_pos] != '\0')
+    ++cur_pos;
+
+  return specifier;
+}
+
+} // namespace gpu
+
+} // namespace __llvm_libc
diff --git a/libc/src/stdio/gpu/printf.cpp b/libc/src/stdio/gpu/printf.cpp
new file mode 100644
--- /dev/null
+++ b/libc/src/stdio/gpu/printf.cpp
@@ -0,0 +1,25 @@
+//===-- GPU Implementation of printf --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/printf.h"
+#include "src/stdio/gpu/printf_impl.h"
+
+#include <stdio.h>
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) {
+  va_list vlist;
+  va_start(vlist, format);
+  internal::ArgList args(vlist);
+  va_end(vlist);
+
+  return static_cast<int>(printf_common(stdout, format, args));
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/stdio/gpu/printf_impl.h b/libc/src/stdio/gpu/printf_impl.h
new file mode 100644
--- /dev/null
+++ b/libc/src/stdio/gpu/printf_impl.h
@@ -0,0 +1,55 @@
+//===--------------- Printf format parsing for the GPU --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/RPC/rpc_client.h"
+#include "src/__support/arg_list.h"
+#include "src/stdio/gpu/parser.h"
+
+#include <stdio.h>
+
+namespace __llvm_libc {
+
+template <unsigned opcode>
+LIBC_INLINE uint64_t printf_impl(::FILE *__restrict stream,
+                                 const char *__restrict format,
+                                 internal::ArgList &args) {
+  rpc::Client::Port port = rpc::client.open<opcode>();
+  if constexpr (opcode == RPC_PRINTF_TO_STREAM)
+    port.send([&](rpc::Buffer *buffer) {
+      buffer->data[0] = reinterpret_cast<uintptr_t>(stream);
+    });
+
+  port.send_n(format, internal::string_length(format) + 1);
+
+  uint64_t mask = gpu::get_lane_mask();
+  gpu::MicroParser parser(format, args);
+  for (gpu::Specifier cur = parser.get_next_specifier();
+       gpu::ballot(mask, !parser.end(cur)); cur = parser.get_next_specifier())
+    port.send_n(parser.get_pointer(cur), parser.get_size(cur));
+
+  uint64_t ret = 0;
+  port.recv([&](rpc::Buffer *buffer) {
+    ret = reinterpret_cast<uint64_t *>(buffer->data)[0];
+  });
+  port.close();
+
+  return ret;
+}
+
+LIBC_INLINE uint64_t printf_common(::FILE *__restrict stream,
+                                   const char *__restrict format,
+                                   internal::ArgList &args) {
+  if (stream == stdout)
+    return printf_impl<RPC_PRINTF_TO_STDOUT>(stdout, format, args);
+  else if (stream == stderr)
+    return printf_impl<RPC_PRINTF_TO_STDERR>(stderr, format, args);
+  else
+    return printf_impl<RPC_PRINTF_TO_STREAM>(stream, format, args);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -21,11 +21,13 @@
 namespace __llvm_libc {
 namespace printf_core {
 
-#ifndef LIBC_COPT_MOCK_ARG_LIST
-using ArgProvider = internal::ArgList;
-#else  // not defined LIBC_COPT_MOCK_ARG_LIST
+#if defined(LIBC_COPT_ARRAY_ARG_LIST)
+using ArgProvider = internal::ArrayArgList;
+#elif defined(LIBC_COPT_MOCK_ARG_LIST)
 using ArgProvider = internal::MockArgList;
-#endif // LIBC_COPT_MOCK_ARG_LIST
+#else
+using ArgProvider = internal::ArgList;
+#endif
 
 class Parser {
   const char *__restrict str;
diff --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp
--- a/libc/src/stdio/printf_core/parser.cpp
+++ b/libc/src/stdio/printf_core/parser.cpp
@@ -173,9 +173,11 @@
     case ('n'):
 #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
     case ('p'):
-    case ('s'):
       WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
       break;
+    case ('s'):
+      WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, char *, conv_index);
+      break;
     default:
       // if the conversion is undefined, change this to a raw section.
       section.has_conv = false;
diff --git a/libc/test/integration/startup/gpu/CMakeLists.txt b/libc/test/integration/startup/gpu/CMakeLists.txt
--- a/libc/test/integration/startup/gpu/CMakeLists.txt
+++ b/libc/test/integration/startup/gpu/CMakeLists.txt
@@ -1,6 +1,9 @@
 add_custom_target(libc-startup-tests)
 add_dependencies(libc-integration-tests libc-startup-tests)
 
+# Create an output directory for any temporary test files.
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testdata)
+
 add_integration_test(
   startup_args_test
   SUITE libc-startup-tests
@@ -53,3 +56,22 @@
    --threads 32
    --blocks 8
 )
+
+add_integration_test(
+  startup_rpc_printf_test
+  SUITE libc-startup-tests
+  SRCS
+   rpc_printf_test.cpp
+  DEPENDS
+    libc.include.stdio
+    libc.src.__support.RPC.rpc_client
+    libc.src.__support.GPU.utils
+    libc.src.stdio.stdout
+    libc.src.stdio.stderr
+    libc.src.stdio.fprintf
+    libc.src.stdio.fopen
+    libc.src.stdio.fclose
+  LOADER_ARGS
+   --threads 32
+   --blocks 4
+)
diff --git a/libc/test/integration/startup/gpu/rpc_printf_test.cpp b/libc/test/integration/startup/gpu/rpc_printf_test.cpp
new file mode 100644
--- /dev/null
+++ b/libc/test/integration/startup/gpu/rpc_printf_test.cpp
@@ -0,0 +1,47 @@
+//===-- RPC test to check args to printf ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/GPU/utils.h"
+#include "src/stdio/fopen.h"
+#include "src/stdio/fprintf.h"
+#include "test/IntegrationTest/test.h"
+#include <stdint.h>
+
+using namespace __llvm_libc;
+
+FILE *file = __llvm_libc::fopen("testdata/test_data.txt", "w");
+
+TEST_MAIN(int argc, char **argv, char **envp) {
+  ASSERT_TRUE(file && "failed to open file");
+
+  int written = 0;
+  written = __llvm_libc::fprintf(file, "A simple string\n");
+  ASSERT_EQ(written, 16);
+
+  written = __llvm_libc::fprintf(file, "%s", "A simple string\n");
+  ASSERT_EQ(written, 16);
+
+  written = __llvm_libc::fprintf(file, "%8ld\n", gpu::get_thread_id());
+  ASSERT_EQ(written, 9);
+
+  written = __llvm_libc::fprintf(file, "%00000%%c", 'c');
+  ASSERT_EQ(written, 2);
+
+  written = __llvm_libc::fprintf(file, "%d%c%.1f\n", 1, 'c', 1.0);
+  ASSERT_EQ(written, 6);
+
+  const char *str = gpu::get_thread_id() % 2 ? "%s" : "%20ld\n";
+  written = __llvm_libc::fprintf(file, str, "string\n");
+  ASSERT_EQ(written, gpu::get_thread_id() % 2 ? 7 : 21);
+
+  const char *arg = gpu::get_thread_id() % 2 ? "string\n" : "%s";
+  written = __llvm_libc::fprintf(file, arg, "string\n");
+  ASSERT_EQ(written, 7);
+
+  return 0;
+}
diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt
--- a/libc/test/src/stdio/CMakeLists.txt
+++ b/libc/test/src/stdio/CMakeLists.txt
@@ -151,7 +151,7 @@
  set(fprintf_test_copts "-DLIBC_COPT_PRINTF_USE_SYSTEM_FILE")
 endif()
 
-add_libc_unittest(
+add_libc_test(
   fprintf_test
   SUITE
     libc_stdio_unittests
@@ -173,6 +173,8 @@
     printf_test.cpp
   DEPENDS
     libc.src.stdio.printf
+    libc.src.stdio.stdout
+    libc.src.stdio.stderr
 )
 
 add_fp_unittest(
diff --git a/libc/utils/gpu/server/CMakeLists.txt b/libc/utils/gpu/server/CMakeLists.txt
--- a/libc/utils/gpu/server/CMakeLists.txt
+++ b/libc/utils/gpu/server/CMakeLists.txt
@@ -1,4 +1,9 @@
-add_library(llvmlibc_rpc_server STATIC rpc_server.cpp)
+add_library(llvmlibc_rpc_server 
+  STATIC 
+  ${LIBC_SOURCE_DIR}/src/stdio/printf_core/parser.cpp
+  ${LIBC_SOURCE_DIR}/src/stdio/printf_core/writer.cpp
+  ${LIBC_SOURCE_DIR}/src/stdio/printf_core/converter.cpp
+  rpc_server.cpp)
 
 # Include the RPC implemenation from libc.
 target_include_directories(llvmlibc_rpc_server PRIVATE ${LIBC_SOURCE_DIR})
@@ -9,6 +14,13 @@
 target_compile_options(llvmlibc_rpc_server PUBLIC
                        $<$<CXX_COMPILER_ID:GNU>:-Wno-attributes>)
 
+target_compile_definitions(llvmlibc_rpc_server 
+                           PRIVATE 
+                           LIBC_COPT_GPU_MOCK_ARG_LIST
+                           LIBC_COPT_ARRAY_ARG_LIST
+                           LIBC_COPT_PRINTF_DISABLE_WRITE_INT
+                           LIBC_COPT_PRINTF_DISABLE_INDEX_MODE)
+
 # Install the server and associated header.
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/rpc_server.h
         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/gpu-none-llvm/
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -8,6 +8,12 @@
 
 #include "rpc_server.h"
 
+#include "src/__support/arg_list.h"
+#include "src/stdio/gpu/parser.h"
+#include "src/stdio/printf_core/converter.h"
+#include "src/stdio/printf_core/parser.h"
+#include "src/stdio/printf_core/writer.h"
+
 #include "src/__support/RPC/rpc.h"
 #include <atomic>
 #include <cstdio>
@@ -19,6 +25,7 @@
 #include <vector>
 
 using namespace __llvm_libc;
+using namespace __llvm_libc::printf_core;
 
 static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer),
               "Buffer size mismatch");
@@ -26,6 +33,102 @@
 static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT,
               "Incorrect maximum port count");
 
+template <uint32_t lane_size>
+static void handle_printf(typename rpc::Server<lane_size>::Port &port) {
+  uint64_t total_sizes[lane_size] = {0};
+  uint64_t sizes[lane_size] = {0};
+
+  void *format[lane_size] = {nullptr};
+  FILE *files[lane_size] = {nullptr};
+
+  // Get the appropriate output stream to use.
+  if (port.get_opcode() == RPC_PRINTF_TO_STREAM)
+    port.recv([&](rpc::Buffer *buffer, uint32_t id) {
+      files[id] = reinterpret_cast<FILE *>(buffer->data[0]);
+    });
+  else if (port.get_opcode() == RPC_PRINTF_TO_STDOUT)
+    std::fill(files, files + lane_size, stdout);
+  else
+    std::fill(files, files + lane_size, stderr);
+
+  // Recieve the format string from the client.
+  port.recv_n(format, sizes, [&](uint64_t size) { return new char[size]; });
+
+  for (uint32_t lane = 0; lane < lane_size; ++lane)
+    total_sizes[lane] += rpc::align_up(sizes[lane], sizeof(uintptr_t));
+
+  // Parse the formatting string using the same parser the client uses. This
+  // tells us exactly how many packets we need to be recieving from the client.
+  uint32_t num_specifiers = 0;
+  for (uint32_t lane = 0; lane < lane_size; ++lane) {
+    if (!format[lane])
+      continue;
+
+    __llvm_libc::internal::MockArgList args;
+    uint32_t count = 0;
+    gpu::MicroParser parser(reinterpret_cast<const char *>(format[lane]), args);
+    for (gpu::Specifier cur = parser.get_next_specifier(); !parser.end(cur);
+         cur = parser.get_next_specifier())
+      count++;
+    num_specifiers = std::max(num_specifiers, count);
+  }
+
+  // Recieve all the arguments from the client and allocate storage for them.
+  std::vector<void *> all_args[lane_size];
+  for (uint32_t i = 0; i < num_specifiers; ++i) {
+    void *args[lane_size] = {nullptr};
+    port.recv_n(args, sizes, [&](uint64_t size) { return new char[size]; });
+
+    for (uint32_t lane = 0; lane < lane_size; ++lane) {
+      if (sizes[lane] > 0)
+        all_args[lane].push_back(args[lane]);
+      total_sizes[lane] += rpc::align_up(sizes[lane], sizeof(uintptr_t));
+    }
+  }
+
+  int results[lane_size] = {0};
+  for (uint32_t lane = 0; lane < lane_size; ++lane) {
+    if (!format[lane])
+      continue;
+
+    // We assume that twice the input length will be enough to fit the string.
+    uint64_t buffer_size = std::max(256ul, 2 * total_sizes[lane]);
+
+    std::unique_ptr<char[]> buffer(new char[buffer_size]);
+    WriteBuffer wb(buffer.get(), buffer_size);
+    Writer writer(&wb);
+
+    internal::ArrayArgList args(all_args[lane].data());
+    Parser parser(reinterpret_cast<const char *>(format[lane]), args);
+
+    // Parse and print the format string using the arguments we copied from the
+    // client.
+    for (FormatSection cur_section = parser.get_next_section();
+         !cur_section.raw_string.empty();
+         cur_section = parser.get_next_section()) {
+      if (cur_section.has_conv) {
+        convert(&writer, cur_section);
+      } else {
+        writer.write(cur_section.raw_string);
+      }
+    }
+    results[lane] =
+        fwrite(buffer.get(), 1, writer.get_chars_written(), files[lane]);
+  }
+
+  port.send(
+      [&](rpc::Buffer *buffer, uint32_t id) { buffer->data[0] = results[id]; });
+
+  for (uint32_t lane = 0; lane < lane_size; ++lane) {
+    if (!format[lane])
+      continue;
+
+    delete[] reinterpret_cast<char *>(format[lane]);
+    for (void *ptr : all_args[lane])
+      delete[] reinterpret_cast<uintptr_t *>(ptr);
+  }
+}
+
 // The client needs to support different lane sizes for the SIMT model. Because
 // of this we need to select between the possible sizes that the client can use.
 struct Server {
@@ -164,10 +267,14 @@
       });
       break;
     }
-    case RPC_NOOP: {
+    case RPC_PRINTF_TO_STDOUT:
+    case RPC_PRINTF_TO_STDERR:
+    case RPC_PRINTF_TO_STREAM:
+      handle_printf<lane_size>(*port);
+      break;
+    case RPC_NOOP:
       port->recv([](rpc::Buffer *) {});
       break;
-    }
     default: {
       auto handler =
           callbacks.find(static_cast<rpc_opcode_t>(port->get_opcode()));