Index: clangd/ClangdServer.cpp
===================================================================
--- clangd/ClangdServer.cpp
+++ clangd/ClangdServer.cpp
@@ -232,14 +232,8 @@
 
     RefactoringResultCollector ResultCollector;
     const SourceManager &SourceMgr = AST.getASTContext().getSourceManager();
-    const FileEntry *FE =
-        SourceMgr.getFileEntryForID(SourceMgr.getMainFileID());
-    if (!FE)
-      return CB(llvm::make_error<llvm::StringError>(
-          "rename called for non-added document",
-          llvm::errc::invalid_argument));
     SourceLocation SourceLocationBeg =
-        clangd::getBeginningOfIdentifier(AST, Pos, FE);
+        clangd::getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
     tooling::RefactoringRuleContext Context(
         AST.getASTContext().getSourceManager());
     Context.setASTContext(AST.getASTContext());
Index: clangd/ClangdUnit.h
===================================================================
--- clangd/ClangdUnit.h
+++ clangd/ClangdUnit.h
@@ -173,8 +173,9 @@
 };
 
 /// Get the beginning SourceLocation at a specified \p Pos.
+/// May be invalid if Pos is, or if there's no identifier.
 SourceLocation getBeginningOfIdentifier(ParsedAST &Unit, const Position &Pos,
-                                        const FileEntry *FE);
+                                        const FileID FID);
 
 /// For testing/debugging purposes. Note that this method deserializes all
 /// unserialized Decls, so use with care.
Index: clangd/ClangdUnit.cpp
===================================================================
--- clangd/ClangdUnit.cpp
+++ clangd/ClangdUnit.cpp
@@ -215,19 +215,6 @@
                    std::move(IncLocations));
 }
 
-namespace {
-
-SourceLocation getMacroArgExpandedLocation(const SourceManager &Mgr,
-                                           const FileEntry *FE, Position Pos) {
-  // The language server protocol uses zero-based line and column numbers.
-  // Clang uses one-based numbers.
-  SourceLocation InputLoc =
-      Mgr.translateFileLineCol(FE, Pos.line + 1, Pos.character + 1);
-  return Mgr.getMacroArgExpandedLocation(InputLoc);
-}
-
-} // namespace
-
 void ParsedAST::ensurePreambleDeclsDeserialized() {
   if (PreambleDeclsDeserialized || !Preamble)
     return;
@@ -470,40 +457,34 @@
 
 SourceLocation clangd::getBeginningOfIdentifier(ParsedAST &Unit,
                                                 const Position &Pos,
-                                                const FileEntry *FE) {
+                                                const FileID FID) {
   const ASTContext &AST = Unit.getASTContext();
   const SourceManager &SourceMgr = AST.getSourceManager();
-
-  SourceLocation InputLocation =
-      getMacroArgExpandedLocation(SourceMgr, FE, Pos);
-  if (Pos.character == 0) {
-    return InputLocation;
-  }
-
-  // This handle cases where the position is in the middle of a token or right
-  // after the end of a token. In theory we could just use GetBeginningOfToken
-  // to find the start of the token at the input position, but this doesn't
-  // work when right after the end, i.e. foo|.
-  // So try to go back by one and see if we're still inside an identifier
-  // token. If so, Take the beginning of this token.
-  // (It should be the same identifier because you can't have two adjacent
-  // identifiers without another token in between.)
-  Position PosCharBehind = Pos;
-  --PosCharBehind.character;
-
-  SourceLocation PeekBeforeLocation =
-      getMacroArgExpandedLocation(SourceMgr, FE, PosCharBehind);
-  Token Result;
-  if (Lexer::getRawToken(PeekBeforeLocation, Result, SourceMgr,
-                         AST.getLangOpts(), false)) {
-    // getRawToken failed, just use InputLocation.
-    return InputLocation;
+  auto Offset = positionToOffset(SourceMgr.getBufferData(FID), Pos);
+  if (!Offset) {
+    log("getBeginningOfIdentifier: " + toString(Offset.takeError()));
+    return SourceLocation();
   }
-
-  if (Result.is(tok::raw_identifier)) {
-    return Lexer::GetBeginningOfToken(PeekBeforeLocation, SourceMgr,
-                                      AST.getLangOpts());
-  }
-
-  return InputLocation;
+  SourceLocation InputLoc = SourceMgr.getComposedLoc(FID, *Offset);
+
+  // GetBeginningOfToken(pos) is almost what we want, but does the wrong thing
+  // if the cursor is at the end of the identifier.
+  // Instead, we lex at GetBeginningOfToken(pos - 1). The cases are:
+  //  1) at the beginning of an identifier, we'll be looking at something
+  //  that isn't an identifier.
+  //  2) at the middle or end of an identifier, we get the identifier.
+  //  3) anywhere outside an identifier, we'll get some non-identifier thing.
+  // We can't actually distinguish cases 1 and 3, but returning the original
+  // location is correct for both!
+  if (*Offset == 0) // Case 1 or 3.
+    return SourceMgr.getMacroArgExpandedLocation(InputLoc);
+  SourceLocation Before =
+      SourceMgr.getMacroArgExpandedLocation(InputLoc.getLocWithOffset(-1));
+  Before = Lexer::GetBeginningOfToken(Before, SourceMgr, AST.getLangOpts());
+  Token Tok;
+  if (Before.isValid() &&
+      !Lexer::getRawToken(Before, Tok, SourceMgr, AST.getLangOpts(), false) &&
+      Tok.is(tok::raw_identifier))
+    return Before;                                        // Case 2.
+  return SourceMgr.getMacroArgExpandedLocation(InputLoc); // Case 1 or 3.
 }
Index: clangd/CodeComplete.cpp
===================================================================
--- clangd/CodeComplete.cpp
+++ clangd/CodeComplete.cpp
@@ -729,8 +729,15 @@
   FrontendOpts.SkipFunctionBodies = true;
   FrontendOpts.CodeCompleteOpts = Options;
   FrontendOpts.CodeCompletionAt.FileName = Input.FileName;
-  FrontendOpts.CodeCompletionAt.Line = Input.Pos.line + 1;
-  FrontendOpts.CodeCompletionAt.Column = Input.Pos.character + 1;
+  auto Offset = positionToOffset(Input.Contents, Input.Pos);
+  if (!Offset) {
+    log("Code completion position was invalid " +
+        llvm::toString(Offset.takeError()));
+    return false;
+  }
+  std::tie(FrontendOpts.CodeCompletionAt.Line,
+           FrontendOpts.CodeCompletionAt.Column) =
+      offsetToClangLineColumn(Input.Contents, *Offset);
 
   Clang->setCodeCompletionConsumer(Consumer.release());
 
Index: clangd/Protocol.h
===================================================================
--- clangd/Protocol.h
+++ clangd/Protocol.h
@@ -91,6 +91,8 @@
   int line = 0;
 
   /// Character offset on a line in a document (zero-based).
+  /// WARNING: this is in UTF-16 codepoints, not bytes or characters!
+  /// Use the functions in SourceCode.h to construct/interpret Positions.
   int character = 0;
 
   friend bool operator==(const Position &LHS, const Position &RHS) {
Index: clangd/SourceCode.h
===================================================================
--- clangd/SourceCode.h
+++ clangd/SourceCode.h
@@ -23,14 +23,9 @@
 
 /// Turn a [line, column] pair into an offset in Code.
 ///
-/// If the character value is greater than the line length, the behavior depends
-/// on AllowColumnsBeyondLineLength:
-///
-/// - if true: default back to the end of the line
-/// - if false: return an error
-///
-/// If the line number is greater than the number of lines in the document,
-/// always return an error.
+/// If P.character exceeds the line length, returns the offset at end-of-line.
+/// (If !AllowColumnsBeyondLineLength, then returns an error instead).
+/// If the line number is out of range, returns an error.
 ///
 /// The returned value is in the range [0, Code.size()].
 llvm::Expected<size_t>
@@ -38,7 +33,7 @@
                  bool AllowColumnsBeyondLineLength = true);
 
 /// Turn an offset in Code into a [line, column] pair.
-/// FIXME: This should return an error if the offset is invalid.
+/// The offset must be in range [0, Code.size()].
 Position offsetToPosition(llvm::StringRef Code, size_t Offset);
 
 /// Turn a SourceLocation into a [line, column] pair.
@@ -49,6 +44,12 @@
 // Note that clang also uses closed source ranges, which this can't handle!
 Range halfOpenToRange(const SourceManager &SM, CharSourceRange R);
 
+// Converts an offset to a clang line/column (1-based, columns are bytes).
+// The offset must be in range [0, Code.size()].
+// Prefer to use SourceManager if one is available.
+std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
+                                                  size_t Offset);
+
 /// From "a::b::c", return {"a::b::", "c"}. Scope is empty if there's no
 /// qualifier.
 std::pair<llvm::StringRef, llvm::StringRef>
Index: clangd/SourceCode.cpp
===================================================================
--- clangd/SourceCode.cpp
+++ clangd/SourceCode.cpp
@@ -16,6 +16,66 @@
 namespace clangd {
 using namespace llvm;
 
+// Here be dragons. LSP positions use columns measured in *UTF-16 code units*!
+// Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial.
+
+// Iterates over unicode codepoints in the (UTF-8) string. For each,
+// invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true.
+// Returns true if CB returned true, false if we hit the end of string.
+template <typename Callback>
+bool iterateCodepoints(StringRef U8, const Callback &CB) {
+  for (size_t I = 0; I < U8.size();) {
+    unsigned char C = static_cast<unsigned char>(U8[I]);
+    if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character.
+      if (CB(1, 1))
+        return true;
+      ++I;
+      continue;
+    }
+    static_assert(std::numeric_limits<unsigned char>::is_integer, "x");
+    size_t UTF8Length = countLeadingOnes<unsigned char>(C);
+    // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here.
+    // 11111xxx is not valid UTF-8 at all.
+    assert((UTF8Length >= 2 && UTF8Length <= 4) &&
+           "Invalid UTF-8, or transcoding bug?");
+    I += UTF8Length; // Skip over all trailing bytes.
+    // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
+    // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...)
+    if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1))
+      return true;
+  }
+  return false;
+}
+
+// Returns the offset into the string that matches \p Units UTF-16 code units.
+// Conceptually, this converts to UTF-16, truncates to CodeUnits, converts back
+// to UTF-8, and returns the length in bytes.
+static size_t measureUTF16(StringRef U8, int Units, bool &Valid) {
+  size_t Result = 0;
+  Valid = Units == 0 || iterateCodepoints(U8, [&](int U8Len, int U16Len) {
+            Result += U8Len;
+            Units -= U16Len;
+            return Units <= 0;
+          });
+  if (Units < 0) // Offset was into the middle of a surrogate pair.
+    Valid = false;
+  // Don't return an out-of-range index if we overran.
+  return std::min(Result, U8.size());
+}
+
+// Counts the number of UTF-16 code units needed to represent a string.
+// Like most strings in clangd, the input is UTF-8 encoded.
+static size_t utf16Len(StringRef U8) {
+  // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
+  // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
+  size_t Count = 0;
+  iterateCodepoints(U8, [&](int U8Len, int U16Len) {
+    Count += U16Len;
+    return false;
+  });
+  return Count;
+}
+
 llvm::Expected<size_t> positionToOffset(StringRef Code, Position P,
                                         bool AllowColumnsBeyondLineLength) {
   if (P.line < 0)
@@ -40,12 +100,15 @@
   if (NextNL == StringRef::npos)
     NextNL = Code.size();
 
-  if (StartOfLine + P.character > NextNL && !AllowColumnsBeyondLineLength)
+  bool Valid;
+  size_t ByteOffsetInLine = measureUTF16(
+      Code.substr(StartOfLine, NextNL - StartOfLine), P.character, Valid);
+  if (!Valid && !AllowColumnsBeyondLineLength)
     return llvm::make_error<llvm::StringError>(
-        llvm::formatv("Character value is out of range ({0})", P.character),
+        llvm::formatv("UTF-16 offset {0} is invalid for line {1}", P.character,
+                      P.line),
         llvm::errc::invalid_argument);
-  // FIXME: officially P.character counts UTF-16 code units, not UTF-8 bytes!
-  return std::min(NextNL, StartOfLine + P.character);
+  return StartOfLine + ByteOffsetInLine;
 }
 
 Position offsetToPosition(StringRef Code, size_t Offset) {
@@ -54,17 +117,25 @@
   int Lines = Before.count('\n');
   size_t PrevNL = Before.rfind('\n');
   size_t StartOfLine = (PrevNL == StringRef::npos) ? 0 : (PrevNL + 1);
-  // FIXME: officially character counts UTF-16 code units, not UTF-8 bytes!
   Position Pos;
   Pos.line = Lines;
-  Pos.character = static_cast<int>(Offset - StartOfLine);
+  Pos.character = utf16Len(Before.substr(StartOfLine));
   return Pos;
 }
 
 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) {
+  // We use the SourceManager's line tables, but its column number is in bytes.
+  auto LocInfo = SM.getDecomposedSpellingLoc(Loc);
   Position P;
-  P.line = static_cast<int>(SM.getSpellingLineNumber(Loc)) - 1;
-  P.character = static_cast<int>(SM.getSpellingColumnNumber(Loc)) - 1;
+  P.line =
+      static_cast<int>(SM.getLineNumber(LocInfo.first, LocInfo.second)) - 1;
+  bool Invalid = false;
+  StringRef Code = SM.getBufferData(LocInfo.first, &Invalid);
+  if (!Invalid) {
+    auto ColumnInBytes = SM.getColumnNumber(LocInfo.first, LocInfo.second) - 1;
+    P.character =
+        utf16Len(Code.substr(LocInfo.second - ColumnInBytes, ColumnInBytes));
+  }
   return P;
 }
 
@@ -76,6 +147,16 @@
   return {Begin, End};
 }
 
+std::pair<size_t, size_t> offsetToClangLineColumn(StringRef Code,
+                                                  size_t Offset) {
+  Offset = std::min(Code.size(), Offset);
+  StringRef Before = Code.substr(0, Offset);
+  int Lines = Before.count('\n');
+  size_t PrevNL = Before.rfind('\n');
+  size_t StartOfLine = (PrevNL == StringRef::npos) ? 0 : (PrevNL + 1);
+  return {Lines + 1, Offset - StartOfLine + 1};
+}
+
 std::pair<llvm::StringRef, llvm::StringRef>
 splitQualifiedName(llvm::StringRef QName) {
   size_t Pos = QName.rfind("::");
Index: clangd/XRefs.cpp
===================================================================
--- clangd/XRefs.cpp
+++ clangd/XRefs.cpp
@@ -164,11 +164,8 @@
 
 std::vector<Location> findDefinitions(ParsedAST &AST, Position Pos) {
   const SourceManager &SourceMgr = AST.getASTContext().getSourceManager();
-  const FileEntry *FE = SourceMgr.getFileEntryForID(SourceMgr.getMainFileID());
-  if (!FE)
-    return {};
-
-  SourceLocation SourceLocationBeg = getBeginningOfIdentifier(AST, Pos, FE);
+  SourceLocation SourceLocationBeg =
+      getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
 
   std::vector<Location> Result;
   // Handle goto definition for #include.
@@ -280,11 +277,8 @@
 std::vector<DocumentHighlight> findDocumentHighlights(ParsedAST &AST,
                                                       Position Pos) {
   const SourceManager &SourceMgr = AST.getASTContext().getSourceManager();
-  const FileEntry *FE = SourceMgr.getFileEntryForID(SourceMgr.getMainFileID());
-  if (!FE)
-    return {};
-
-  SourceLocation SourceLocationBeg = getBeginningOfIdentifier(AST, Pos, FE);
+  SourceLocation SourceLocationBeg =
+      getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
 
   DeclarationAndMacrosFinder DeclMacrosFinder(llvm::errs(), SourceLocationBeg,
                                               AST.getASTContext(),
@@ -413,11 +407,8 @@
 
 Hover getHover(ParsedAST &AST, Position Pos) {
   const SourceManager &SourceMgr = AST.getASTContext().getSourceManager();
-  const FileEntry *FE = SourceMgr.getFileEntryForID(SourceMgr.getMainFileID());
-  if (FE == nullptr)
-    return Hover();
-
-  SourceLocation SourceLocationBeg = getBeginningOfIdentifier(AST, Pos, FE);
+  SourceLocation SourceLocationBeg =
+      getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
   DeclarationAndMacrosFinder DeclMacrosFinder(llvm::errs(), SourceLocationBeg,
                                               AST.getASTContext(),
                                               AST.getPreprocessor());
Index: test/clangd/rename.test
===================================================================
--- test/clangd/rename.test
+++ test/clangd/rename.test
@@ -36,4 +36,4 @@
 ---
 {"jsonrpc":"2.0","id":3,"method":"shutdown"}
 ---
-{"jsonrpc":"2.0":"method":"exit"}
+{"jsonrpc":"2.0","method":"exit"}
Index: unittests/clangd/ClangdUnitTests.cpp
===================================================================
--- unittests/clangd/ClangdUnitTests.cpp
+++ unittests/clangd/ClangdUnitTests.cpp
@@ -9,6 +9,7 @@
 
 #include "Annotations.h"
 #include "ClangdUnit.h"
+#include "SourceCode.h"
 #include "TestFS.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/PCHContainerOperations.h"
@@ -216,6 +217,28 @@
                   Pair(EqualToLSPDiag(NoteInMainLSP), IsEmpty())));
 }
 
+TEST(ClangdUnitTest, GetBeginningOfIdentifier) {
+  // First ^ is the expected beginning, last is the search position.
+  for (const char *Text : {
+           "int ^f^oo();", // inside identifier
+           "int ^foo();",  // beginning of identifier
+           "int ^foo^();", // end of identifier
+           "int foo(^);",  // non-identifier
+           "^int foo();",  // beginning of file (can't back up)
+           "int ^f0^0();", // after a digit (lexing at N-1 is wrong)
+           "int ^λλ^λ();", // UTF-8 handled properly when backing up
+       }) {
+    Annotations TestCase(Text);
+    auto AST = build(TestCase.code());
+    const auto &SourceMgr = AST.getASTContext().getSourceManager();
+    SourceLocation Actual = getBeginningOfIdentifier(
+        AST, TestCase.points().back(), SourceMgr.getMainFileID());
+    Position ActualPos =
+        offsetToPosition(TestCase.code(), SourceMgr.getFileOffset(Actual));
+    EXPECT_EQ(TestCase.points().front(), ActualPos) << Text;
+  }
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: unittests/clangd/DraftStoreTests.cpp
===================================================================
--- unittests/clangd/DraftStoreTests.cpp
+++ unittests/clangd/DraftStoreTests.cpp
@@ -241,7 +241,7 @@
 
   EXPECT_TRUE(!Result);
   EXPECT_EQ(llvm::toString(Result.takeError()),
-            "Character value is out of range (100)");
+            "UTF-16 offset 100 is invalid for line 0");
 }
 
 TEST(DraftStoreIncrementalUpdateTest, EndCharOutOfRange) {
@@ -262,7 +262,7 @@
 
   EXPECT_TRUE(!Result);
   EXPECT_EQ(llvm::toString(Result.takeError()),
-            "Character value is out of range (100)");
+            "UTF-16 offset 100 is invalid for line 0");
 }
 
 TEST(DraftStoreIncrementalUpdateTest, StartLineOutOfRange) {
@@ -338,7 +338,7 @@
 
   EXPECT_TRUE(!Result);
   EXPECT_EQ(llvm::toString(Result.takeError()),
-            "Character value is out of range (100)");
+            "UTF-16 offset 100 is invalid for line 0");
 
   llvm::Optional<std::string> Contents = DS.getDraft(File);
   EXPECT_TRUE(Contents);
Index: unittests/clangd/SourceCodeTests.cpp
===================================================================
--- unittests/clangd/SourceCodeTests.cpp
+++ unittests/clangd/SourceCodeTests.cpp
@@ -24,9 +24,10 @@
   return arg.line == Line && arg.character == Col;
 }
 
+// The = → 🡆 below are ASCII (1 byte), BMP (3 bytes), and astral (4 bytes).
 const char File[] = R"(0:0 = 0
-1:0 = 8
-2:0 = 16)";
+1:0 → 8
+2:0 🡆 18)";
 
 /// A helper to make tests easier to read.
 Position position(int line, int character) {
@@ -66,25 +67,31 @@
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 3), false),
                        HasValue(11));
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 6)),
-                       HasValue(14)); // last character
+                       HasValue(16)); // last character
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 7)),
-                       HasValue(15)); // the newline itself
+                       HasValue(17)); // the newline itself
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8)),
-                       HasValue(15)); // out of range
+                       HasValue(17)); // out of range
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8), false),
                        Failed()); // out of range
   // last line
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, -1)),
                        Failed()); // out of range
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 0)),
-                       HasValue(16)); // first character
+                       HasValue(18)); // first character
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 3)),
-                       HasValue(19)); // middle character
-  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 7)),
-                       HasValue(23)); // last character
+                       HasValue(21)); // middle character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 5), false),
+                       Failed()); // middle of surrogate pair
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 5)),
+                       HasValue(26)); // middle of surrogate pair
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 6), false),
+                       HasValue(26)); // end of surrogate pair
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 8)),
-                       HasValue(24)); // EOF
-  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 9), false),
+                       HasValue(28)); // last character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 9)),
+                       HasValue(29)); // EOF
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 10), false),
                        Failed()); // out of range
   // line out of bounds
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 0)), Failed());
@@ -97,14 +104,19 @@
   EXPECT_THAT(offsetToPosition(File, 6), Pos(0, 6)) << "end of first line";
   EXPECT_THAT(offsetToPosition(File, 7), Pos(0, 7)) << "first newline";
   EXPECT_THAT(offsetToPosition(File, 8), Pos(1, 0)) << "start of second line";
-  EXPECT_THAT(offsetToPosition(File, 11), Pos(1, 3)) << "in second line";
-  EXPECT_THAT(offsetToPosition(File, 14), Pos(1, 6)) << "end of second line";
-  EXPECT_THAT(offsetToPosition(File, 15), Pos(1, 7)) << "second newline";
-  EXPECT_THAT(offsetToPosition(File, 16), Pos(2, 0)) << "start of last line";
-  EXPECT_THAT(offsetToPosition(File, 19), Pos(2, 3)) << "in last line";
-  EXPECT_THAT(offsetToPosition(File, 23), Pos(2, 7)) << "end of last line";
-  EXPECT_THAT(offsetToPosition(File, 24), Pos(2, 8)) << "EOF";
-  EXPECT_THAT(offsetToPosition(File, 25), Pos(2, 8)) << "out of bounds";
+  EXPECT_THAT(offsetToPosition(File, 12), Pos(1, 4)) << "before BMP char";
+  EXPECT_THAT(offsetToPosition(File, 13), Pos(1, 5)) << "in BMP char";
+  EXPECT_THAT(offsetToPosition(File, 15), Pos(1, 5)) << "after BMP char";
+  EXPECT_THAT(offsetToPosition(File, 16), Pos(1, 6)) << "end of second line";
+  EXPECT_THAT(offsetToPosition(File, 17), Pos(1, 7)) << "second newline";
+  EXPECT_THAT(offsetToPosition(File, 18), Pos(2, 0)) << "start of last line";
+  EXPECT_THAT(offsetToPosition(File, 21), Pos(2, 3)) << "in last line";
+  EXPECT_THAT(offsetToPosition(File, 22), Pos(2, 4)) << "before astral char";
+  EXPECT_THAT(offsetToPosition(File, 24), Pos(2, 6)) << "in astral char";
+  EXPECT_THAT(offsetToPosition(File, 26), Pos(2, 6)) << "after astral char";
+  EXPECT_THAT(offsetToPosition(File, 28), Pos(2, 8)) << "end of last line";
+  EXPECT_THAT(offsetToPosition(File, 29), Pos(2, 9)) << "EOF";
+  EXPECT_THAT(offsetToPosition(File, 30), Pos(2, 9)) << "out of bounds";
 }
 
 } // namespace