Index: include/llvm/Support/DJB.h
===================================================================
--- include/llvm/Support/DJB.h
+++ include/llvm/Support/DJB.h
@@ -20,6 +20,10 @@
 
 /// The Bernstein hash function used by the DWARF accelerator tables.
 uint32_t djbHash(StringRef Buffer, uint32_t H = 5381);
+
+/// Computes the Bernstein hash after folding the input according to the Dwarf 5
+/// standard case folding rules.
+uint32_t caseFoldingDjbHash(StringRef Buffer, uint32_t H = 5381);
 } // namespace llvm
 
 #endif // LLVM_SUPPORT_DJB_H
Index: include/llvm/Support/Unicode.h
===================================================================
--- include/llvm/Support/Unicode.h
+++ include/llvm/Support/Unicode.h
@@ -60,6 +60,10 @@
 ///   * 1 for each of the remaining characters.
 int columnWidthUTF8(StringRef Text);
 
+/// Fold input unicode character according the the Simple unicode case folding
+/// rules.
+int foldCharSimple(int C);
+
 } // namespace unicode
 } // namespace sys
 } // namespace llvm
Index: lib/Support/CMakeLists.txt
===================================================================
--- lib/Support/CMakeLists.txt
+++ lib/Support/CMakeLists.txt
@@ -113,6 +113,7 @@
   Triple.cpp
   Twine.cpp
   Unicode.cpp
+  UnicodeCaseFold.cpp
   YAMLParser.cpp
   YAMLTraits.cpp
   raw_os_ostream.cpp
Index: lib/Support/DJB.cpp
===================================================================
--- lib/Support/DJB.cpp
+++ lib/Support/DJB.cpp
@@ -12,9 +12,85 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/DJB.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Unicode.h"
+
+using namespace llvm;
+
+static inline uint32_t djbHashChar(char C, uint32_t H) {
+  return (H << 5) + H + C;
+}
 
 uint32_t llvm::djbHash(StringRef Buffer, uint32_t H) {
   for (char C : Buffer.bytes())
-    H = ((H << 5) + H) + C;
+    H = djbHashChar(C, H);
+  return H;
+}
+
+static UTF32 chopOneUTF32(StringRef &Buffer) {
+  UTF32 C;
+  const UTF8 *const Begin8Const =
+      reinterpret_cast<const UTF8 *>(Buffer.begin());
+  const UTF8 *Begin8 = Begin8Const;
+  UTF32 *Begin32 = &C;
+
+  // In lenient mode we will always end up with a "reasonable" value in C for
+  // non-empty input.
+  assert(!Buffer.empty());
+  ConvertUTF8toUTF32(&Begin8, reinterpret_cast<const UTF8 *>(Buffer.end()),
+                     &Begin32, &C + 1, lenientConversion);
+  Buffer = Buffer.drop_front(Begin8 - Begin8Const);
+  return C;
+}
+
+static StringRef toUTF8(UTF32 C, MutableArrayRef<UTF8> Storage) {
+  const UTF32 *Begin32 = &C;
+  UTF8 *Begin8 = Storage.begin();
+
+  // The case-folded output should always be a valid unicode character, so use
+  // strict mode here.
+  ConversionResult CR = ConvertUTF32toUTF8(&Begin32, &C + 1, &Begin8,
+                                           Storage.end(), strictConversion);
+  assert(CR == conversionOK && "Case folding produced invalid char?");
+  (void)CR;
+  return StringRef(reinterpret_cast<char *>(Storage.begin()),
+                   Begin8 - Storage.begin());
+}
+
+static UTF32 foldCharDwarf(UTF32 C) {
+  // DWARF v5 addition to the unicode folding rules.
+  // Fold "Latin Small Letter Dotless I" and "Latin Capital Letter I With Dot
+  // Above" into "i".
+  if (C == 0x130 || C == 0x131)
+    return 'i';
+  return sys::unicode::foldCharSimple(C);
+}
+
+static uint32_t caseFoldingDjbHashCharSlow(StringRef &Buffer, uint32_t H) {
+  UTF32 C = chopOneUTF32(Buffer);
+
+  C = foldCharDwarf(C);
+
+  std::array<UTF8, UNI_MAX_UTF8_BYTES_PER_CODE_POINT> Storage;
+  StringRef Folded = toUTF8(C, Storage);
+  return djbHash(Folded, H);
+}
+
+uint32_t llvm::caseFoldingDjbHash(StringRef Buffer, uint32_t H) {
+  while (!Buffer.empty()) {
+    unsigned char C = Buffer.front();
+    if (LLVM_LIKELY(C <= 0x7f)) {
+      // US-ASCII, encoded as one character in utf-8.
+      // This is by far the most common case, so handle this specially.
+      if (C >= 'A' && C <= 'Z')
+        C = 'a' + (C - 'A'); // fold uppercase into lowercase
+      H = djbHashChar(C, H);
+      Buffer = Buffer.drop_front();
+      continue;
+    }
+    H = caseFoldingDjbHashCharSlow(Buffer, H);
+  }
   return H;
 }
Index: lib/Support/UnicodeCaseFold.cpp
===================================================================
--- /dev/null
+++ lib/Support/UnicodeCaseFold.cpp
@@ -0,0 +1,500 @@
+//===---------- Support/UnicodeCaseFold.cpp -------------------------------===//
+//
+// This file was generated by utils/unicode-case-fold.py from the Unicode
+// case folding database at
+//    http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt
+//
+// To regenerate this file, run:
+//   utils/unicode-case-fold.py \
+//     "http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt" \
+//     > lib/Support/UnicodeCaseFold.cpp
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Unicode.h"
+
+int llvm::sys::unicode::foldCharSimple(int C) {
+  // 26 characters
+  if (C >= 0x0041 && C <= 0x005a && C % 1 == 0)
+    return C + 32;
+  // 23 characters
+  if (C >= 0x00c0 && C <= 0x00d6 && C % 1 == 0)
+    return C + 32;
+  // 7 characters
+  if (C >= 0x00d8 && C <= 0x00de && C % 1 == 0)
+    return C + 32;
+  // 24 characters
+  if (C >= 0x0100 && C <= 0x012e)
+    return C | 1;
+  // 3 characters
+  if (C >= 0x0132 && C <= 0x0136)
+    return C | 1;
+  // 8 characters
+  if (C >= 0x0139 && C <= 0x0147 && C % 2 == 1)
+    return C + 1;
+  // 23 characters
+  if (C >= 0x014a && C <= 0x0176)
+    return C | 1;
+  // 3 characters
+  if (C >= 0x0179 && C <= 0x017d && C % 2 == 1)
+    return C + 1;
+  // 3 characters
+  if (C >= 0x01a0 && C <= 0x01a4)
+    return C | 1;
+  // 9 characters
+  if (C >= 0x01cb && C <= 0x01db && C % 2 == 1)
+    return C + 1;
+  // 9 characters
+  if (C >= 0x01de && C <= 0x01ee)
+    return C | 1;
+  // 20 characters
+  if (C >= 0x01f8 && C <= 0x021e)
+    return C | 1;
+  // 9 characters
+  if (C >= 0x0222 && C <= 0x0232)
+    return C | 1;
+  // 5 characters
+  if (C >= 0x0246 && C <= 0x024e)
+    return C | 1;
+  // 3 characters
+  if (C >= 0x0388 && C <= 0x038a && C % 1 == 0)
+    return C + 37;
+  // 17 characters
+  if (C >= 0x0391 && C <= 0x03a1 && C % 1 == 0)
+    return C + 32;
+  // 9 characters
+  if (C >= 0x03a3 && C <= 0x03ab && C % 1 == 0)
+    return C + 32;
+  // 12 characters
+  if (C >= 0x03d8 && C <= 0x03ee)
+    return C | 1;
+  // 3 characters
+  if (C >= 0x03fd && C <= 0x03ff && C % 1 == 0)
+    return C + -130;
+  // 16 characters
+  if (C >= 0x0400 && C <= 0x040f && C % 1 == 0)
+    return C + 80;
+  // 32 characters
+  if (C >= 0x0410 && C <= 0x042f && C % 1 == 0)
+    return C + 32;
+  // 17 characters
+  if (C >= 0x0460 && C <= 0x0480)
+    return C | 1;
+  // 27 characters
+  if (C >= 0x048a && C <= 0x04be)
+    return C | 1;
+  // 7 characters
+  if (C >= 0x04c1 && C <= 0x04cd && C % 2 == 1)
+    return C + 1;
+  // 48 characters
+  if (C >= 0x04d0 && C <= 0x052e)
+    return C | 1;
+  // 38 characters
+  if (C >= 0x0531 && C <= 0x0556 && C % 1 == 0)
+    return C + 48;
+  // 38 characters
+  if (C >= 0x10a0 && C <= 0x10c5 && C % 1 == 0)
+    return C + 7264;
+  // 6 characters
+  if (C >= 0x13f8 && C <= 0x13fd && C % 1 == 0)
+    return C + -8;
+  // 75 characters
+  if (C >= 0x1e00 && C <= 0x1e94)
+    return C | 1;
+  // 48 characters
+  if (C >= 0x1ea0 && C <= 0x1efe)
+    return C | 1;
+  // 8 characters
+  if (C >= 0x1f08 && C <= 0x1f0f && C % 1 == 0)
+    return C + -8;
+  // 6 characters
+  if (C >= 0x1f18 && C <= 0x1f1d && C % 1 == 0)
+    return C + -8;
+  // 8 characters
+  if (C >= 0x1f28 && C <= 0x1f2f && C % 1 == 0)
+    return C + -8;
+  // 8 characters
+  if (C >= 0x1f38 && C <= 0x1f3f && C % 1 == 0)
+    return C + -8;
+  // 6 characters
+  if (C >= 0x1f48 && C <= 0x1f4d && C % 1 == 0)
+    return C + -8;
+  // 4 characters
+  if (C >= 0x1f59 && C <= 0x1f5f && C % 2 == 1)
+    return C + -8;
+  // 8 characters
+  if (C >= 0x1f68 && C <= 0x1f6f && C % 1 == 0)
+    return C + -8;
+  // 8 characters
+  if (C >= 0x1f88 && C <= 0x1f8f && C % 1 == 0)
+    return C + -8;
+  // 8 characters
+  if (C >= 0x1f98 && C <= 0x1f9f && C % 1 == 0)
+    return C + -8;
+  // 8 characters
+  if (C >= 0x1fa8 && C <= 0x1faf && C % 1 == 0)
+    return C + -8;
+  // 4 characters
+  if (C >= 0x1fc8 && C <= 0x1fcb && C % 1 == 0)
+    return C + -86;
+  // 16 characters
+  if (C >= 0x2160 && C <= 0x216f && C % 1 == 0)
+    return C + 16;
+  // 26 characters
+  if (C >= 0x24b6 && C <= 0x24cf && C % 1 == 0)
+    return C + 26;
+  // 47 characters
+  if (C >= 0x2c00 && C <= 0x2c2e && C % 1 == 0)
+    return C + 48;
+  // 3 characters
+  if (C >= 0x2c67 && C <= 0x2c6b && C % 2 == 1)
+    return C + 1;
+  // 50 characters
+  if (C >= 0x2c80 && C <= 0x2ce2)
+    return C | 1;
+  // 22 characters
+  if (C >= 0xa642 && C <= 0xa66c)
+    return C | 1;
+  // 14 characters
+  if (C >= 0xa680 && C <= 0xa69a)
+    return C | 1;
+  // 7 characters
+  if (C >= 0xa722 && C <= 0xa72e)
+    return C | 1;
+  // 31 characters
+  if (C >= 0xa732 && C <= 0xa76e)
+    return C | 1;
+  // 5 characters
+  if (C >= 0xa77e && C <= 0xa786)
+    return C | 1;
+  // 10 characters
+  if (C >= 0xa796 && C <= 0xa7a8)
+    return C | 1;
+  // 80 characters
+  if (C >= 0xab70 && C <= 0xabbf && C % 1 == 0)
+    return C + -38864;
+  // 26 characters
+  if (C >= 0xff21 && C <= 0xff3a && C % 1 == 0)
+    return C + 32;
+  // 40 characters
+  if (C >= 0x10400 && C <= 0x10427 && C % 1 == 0)
+    return C + 40;
+  // 36 characters
+  if (C >= 0x104b0 && C <= 0x104d3 && C % 1 == 0)
+    return C + 40;
+  // 51 characters
+  if (C >= 0x10c80 && C <= 0x10cb2 && C % 1 == 0)
+    return C + 64;
+  // 32 characters
+  if (C >= 0x118a0 && C <= 0x118bf && C % 1 == 0)
+    return C + 32;
+  // 34 characters
+  if (C >= 0x1e900 && C <= 0x1e921 && C % 1 == 0)
+    return C + 34;
+
+  switch (C) {
+  case 0x00b5: // MICRO SIGN
+    return 0x03bc;
+  case 0x0178: // LATIN CAPITAL LETTER Y WITH DIAERESIS
+    return 0x00ff;
+  case 0x017f: // LATIN SMALL LETTER LONG S
+    return 0x0073;
+  case 0x0181: // LATIN CAPITAL LETTER B WITH HOOK
+    return 0x0253;
+  case 0x0182: // LATIN CAPITAL LETTER B WITH TOPBAR
+    return 0x0183;
+  case 0x0184: // LATIN CAPITAL LETTER TONE SIX
+    return 0x0185;
+  case 0x0186: // LATIN CAPITAL LETTER OPEN O
+    return 0x0254;
+  case 0x0187: // LATIN CAPITAL LETTER C WITH HOOK
+    return 0x0188;
+  case 0x0189: // LATIN CAPITAL LETTER AFRICAN D
+    return 0x0256;
+  case 0x018a: // LATIN CAPITAL LETTER D WITH HOOK
+    return 0x0257;
+  case 0x018b: // LATIN CAPITAL LETTER D WITH TOPBAR
+    return 0x018c;
+  case 0x018e: // LATIN CAPITAL LETTER REVERSED E
+    return 0x01dd;
+  case 0x018f: // LATIN CAPITAL LETTER SCHWA
+    return 0x0259;
+  case 0x0190: // LATIN CAPITAL LETTER OPEN E
+    return 0x025b;
+  case 0x0191: // LATIN CAPITAL LETTER F WITH HOOK
+    return 0x0192;
+  case 0x0193: // LATIN CAPITAL LETTER G WITH HOOK
+    return 0x0260;
+  case 0x0194: // LATIN CAPITAL LETTER GAMMA
+    return 0x0263;
+  case 0x0196: // LATIN CAPITAL LETTER IOTA
+    return 0x0269;
+  case 0x0197: // LATIN CAPITAL LETTER I WITH STROKE
+    return 0x0268;
+  case 0x0198: // LATIN CAPITAL LETTER K WITH HOOK
+    return 0x0199;
+  case 0x019c: // LATIN CAPITAL LETTER TURNED M
+    return 0x026f;
+  case 0x019d: // LATIN CAPITAL LETTER N WITH LEFT HOOK
+    return 0x0272;
+  case 0x019f: // LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+    return 0x0275;
+  case 0x01a6: // LATIN LETTER YR
+    return 0x0280;
+  case 0x01a7: // LATIN CAPITAL LETTER TONE TWO
+    return 0x01a8;
+  case 0x01a9: // LATIN CAPITAL LETTER ESH
+    return 0x0283;
+  case 0x01ac: // LATIN CAPITAL LETTER T WITH HOOK
+    return 0x01ad;
+  case 0x01ae: // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+    return 0x0288;
+  case 0x01af: // LATIN CAPITAL LETTER U WITH HORN
+    return 0x01b0;
+  case 0x01b1: // LATIN CAPITAL LETTER UPSILON
+    return 0x028a;
+  case 0x01b2: // LATIN CAPITAL LETTER V WITH HOOK
+    return 0x028b;
+  case 0x01b3: // LATIN CAPITAL LETTER Y WITH HOOK
+    return 0x01b4;
+  case 0x01b5: // LATIN CAPITAL LETTER Z WITH STROKE
+    return 0x01b6;
+  case 0x01b7: // LATIN CAPITAL LETTER EZH
+    return 0x0292;
+  case 0x01b8: // LATIN CAPITAL LETTER EZH REVERSED
+    return 0x01b9;
+  case 0x01bc: // LATIN CAPITAL LETTER TONE FIVE
+    return 0x01bd;
+  case 0x01c4: // LATIN CAPITAL LETTER DZ WITH CARON
+    return 0x01c6;
+  case 0x01c5: // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+    return 0x01c6;
+  case 0x01c7: // LATIN CAPITAL LETTER LJ
+    return 0x01c9;
+  case 0x01c8: // LATIN CAPITAL LETTER L WITH SMALL LETTER J
+    return 0x01c9;
+  case 0x01ca: // LATIN CAPITAL LETTER NJ
+    return 0x01cc;
+  case 0x01f1: // LATIN CAPITAL LETTER DZ
+    return 0x01f3;
+  case 0x01f2: // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+    return 0x01f3;
+  case 0x01f4: // LATIN CAPITAL LETTER G WITH ACUTE
+    return 0x01f5;
+  case 0x01f6: // LATIN CAPITAL LETTER HWAIR
+    return 0x0195;
+  case 0x01f7: // LATIN CAPITAL LETTER WYNN
+    return 0x01bf;
+  case 0x0220: // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+    return 0x019e;
+  case 0x023a: // LATIN CAPITAL LETTER A WITH STROKE
+    return 0x2c65;
+  case 0x023b: // LATIN CAPITAL LETTER C WITH STROKE
+    return 0x023c;
+  case 0x023d: // LATIN CAPITAL LETTER L WITH BAR
+    return 0x019a;
+  case 0x023e: // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+    return 0x2c66;
+  case 0x0241: // LATIN CAPITAL LETTER GLOTTAL STOP
+    return 0x0242;
+  case 0x0243: // LATIN CAPITAL LETTER B WITH STROKE
+    return 0x0180;
+  case 0x0244: // LATIN CAPITAL LETTER U BAR
+    return 0x0289;
+  case 0x0245: // LATIN CAPITAL LETTER TURNED V
+    return 0x028c;
+  case 0x0345: // COMBINING GREEK YPOGEGRAMMENI
+    return 0x03b9;
+  case 0x0370: // GREEK CAPITAL LETTER HETA
+    return 0x0371;
+  case 0x0372: // GREEK CAPITAL LETTER ARCHAIC SAMPI
+    return 0x0373;
+  case 0x0376: // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
+    return 0x0377;
+  case 0x037f: // GREEK CAPITAL LETTER YOT
+    return 0x03f3;
+  case 0x0386: // GREEK CAPITAL LETTER ALPHA WITH TONOS
+    return 0x03ac;
+  case 0x038c: // GREEK CAPITAL LETTER OMICRON WITH TONOS
+    return 0x03cc;
+  case 0x038e: // GREEK CAPITAL LETTER UPSILON WITH TONOS
+    return 0x03cd;
+  case 0x038f: // GREEK CAPITAL LETTER OMEGA WITH TONOS
+    return 0x03ce;
+  case 0x03c2: // GREEK SMALL LETTER FINAL SIGMA
+    return 0x03c3;
+  case 0x03cf: // GREEK CAPITAL KAI SYMBOL
+    return 0x03d7;
+  case 0x03d0: // GREEK BETA SYMBOL
+    return 0x03b2;
+  case 0x03d1: // GREEK THETA SYMBOL
+    return 0x03b8;
+  case 0x03d5: // GREEK PHI SYMBOL
+    return 0x03c6;
+  case 0x03d6: // GREEK PI SYMBOL
+    return 0x03c0;
+  case 0x03f0: // GREEK KAPPA SYMBOL
+    return 0x03ba;
+  case 0x03f1: // GREEK RHO SYMBOL
+    return 0x03c1;
+  case 0x03f4: // GREEK CAPITAL THETA SYMBOL
+    return 0x03b8;
+  case 0x03f5: // GREEK LUNATE EPSILON SYMBOL
+    return 0x03b5;
+  case 0x03f7: // GREEK CAPITAL LETTER SHO
+    return 0x03f8;
+  case 0x03f9: // GREEK CAPITAL LUNATE SIGMA SYMBOL
+    return 0x03f2;
+  case 0x03fa: // GREEK CAPITAL LETTER SAN
+    return 0x03fb;
+  case 0x04c0: // CYRILLIC LETTER PALOCHKA
+    return 0x04cf;
+  case 0x10c7: // GEORGIAN CAPITAL LETTER YN
+    return 0x2d27;
+  case 0x10cd: // GEORGIAN CAPITAL LETTER AEN
+    return 0x2d2d;
+  case 0x1c80: // CYRILLIC SMALL LETTER ROUNDED VE
+    return 0x0432;
+  case 0x1c81: // CYRILLIC SMALL LETTER LONG-LEGGED DE
+    return 0x0434;
+  case 0x1c82: // CYRILLIC SMALL LETTER NARROW O
+    return 0x043e;
+  case 0x1c83: // CYRILLIC SMALL LETTER WIDE ES
+    return 0x0441;
+  case 0x1c84: // CYRILLIC SMALL LETTER TALL TE
+    return 0x0442;
+  case 0x1c85: // CYRILLIC SMALL LETTER THREE-LEGGED TE
+    return 0x0442;
+  case 0x1c86: // CYRILLIC SMALL LETTER TALL HARD SIGN
+    return 0x044a;
+  case 0x1c87: // CYRILLIC SMALL LETTER TALL YAT
+    return 0x0463;
+  case 0x1c88: // CYRILLIC SMALL LETTER UNBLENDED UK
+    return 0xa64b;
+  case 0x1e9b: // LATIN SMALL LETTER LONG S WITH DOT ABOVE
+    return 0x1e61;
+  case 0x1e9e: // LATIN CAPITAL LETTER SHARP S
+    return 0x00df;
+  case 0x1fb8: // GREEK CAPITAL LETTER ALPHA WITH VRACHY
+    return 0x1fb0;
+  case 0x1fb9: // GREEK CAPITAL LETTER ALPHA WITH MACRON
+    return 0x1fb1;
+  case 0x1fba: // GREEK CAPITAL LETTER ALPHA WITH VARIA
+    return 0x1f70;
+  case 0x1fbb: // GREEK CAPITAL LETTER ALPHA WITH OXIA
+    return 0x1f71;
+  case 0x1fbc: // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+    return 0x1fb3;
+  case 0x1fbe: // GREEK PROSGEGRAMMENI
+    return 0x03b9;
+  case 0x1fcc: // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+    return 0x1fc3;
+  case 0x1fd8: // GREEK CAPITAL LETTER IOTA WITH VRACHY
+    return 0x1fd0;
+  case 0x1fd9: // GREEK CAPITAL LETTER IOTA WITH MACRON
+    return 0x1fd1;
+  case 0x1fda: // GREEK CAPITAL LETTER IOTA WITH VARIA
+    return 0x1f76;
+  case 0x1fdb: // GREEK CAPITAL LETTER IOTA WITH OXIA
+    return 0x1f77;
+  case 0x1fe8: // GREEK CAPITAL LETTER UPSILON WITH VRACHY
+    return 0x1fe0;
+  case 0x1fe9: // GREEK CAPITAL LETTER UPSILON WITH MACRON
+    return 0x1fe1;
+  case 0x1fea: // GREEK CAPITAL LETTER UPSILON WITH VARIA
+    return 0x1f7a;
+  case 0x1feb: // GREEK CAPITAL LETTER UPSILON WITH OXIA
+    return 0x1f7b;
+  case 0x1fec: // GREEK CAPITAL LETTER RHO WITH DASIA
+    return 0x1fe5;
+  case 0x1ff8: // GREEK CAPITAL LETTER OMICRON WITH VARIA
+    return 0x1f78;
+  case 0x1ff9: // GREEK CAPITAL LETTER OMICRON WITH OXIA
+    return 0x1f79;
+  case 0x1ffa: // GREEK CAPITAL LETTER OMEGA WITH VARIA
+    return 0x1f7c;
+  case 0x1ffb: // GREEK CAPITAL LETTER OMEGA WITH OXIA
+    return 0x1f7d;
+  case 0x1ffc: // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+    return 0x1ff3;
+  case 0x2126: // OHM SIGN
+    return 0x03c9;
+  case 0x212a: // KELVIN SIGN
+    return 0x006b;
+  case 0x212b: // ANGSTROM SIGN
+    return 0x00e5;
+  case 0x2132: // TURNED CAPITAL F
+    return 0x214e;
+  case 0x2183: // ROMAN NUMERAL REVERSED ONE HUNDRED
+    return 0x2184;
+  case 0x2c60: // LATIN CAPITAL LETTER L WITH DOUBLE BAR
+    return 0x2c61;
+  case 0x2c62: // LATIN CAPITAL LETTER L WITH MIDDLE TILDE
+    return 0x026b;
+  case 0x2c63: // LATIN CAPITAL LETTER P WITH STROKE
+    return 0x1d7d;
+  case 0x2c64: // LATIN CAPITAL LETTER R WITH TAIL
+    return 0x027d;
+  case 0x2c6d: // LATIN CAPITAL LETTER ALPHA
+    return 0x0251;
+  case 0x2c6e: // LATIN CAPITAL LETTER M WITH HOOK
+    return 0x0271;
+  case 0x2c6f: // LATIN CAPITAL LETTER TURNED A
+    return 0x0250;
+  case 0x2c70: // LATIN CAPITAL LETTER TURNED ALPHA
+    return 0x0252;
+  case 0x2c72: // LATIN CAPITAL LETTER W WITH HOOK
+    return 0x2c73;
+  case 0x2c75: // LATIN CAPITAL LETTER HALF H
+    return 0x2c76;
+  case 0x2c7e: // LATIN CAPITAL LETTER S WITH SWASH TAIL
+    return 0x023f;
+  case 0x2c7f: // LATIN CAPITAL LETTER Z WITH SWASH TAIL
+    return 0x0240;
+  case 0x2ceb: // COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
+    return 0x2cec;
+  case 0x2ced: // COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
+    return 0x2cee;
+  case 0x2cf2: // COPTIC CAPITAL LETTER BOHAIRIC KHEI
+    return 0x2cf3;
+  case 0xa640: // CYRILLIC CAPITAL LETTER ZEMLYA
+    return 0xa641;
+  case 0xa779: // LATIN CAPITAL LETTER INSULAR D
+    return 0xa77a;
+  case 0xa77b: // LATIN CAPITAL LETTER INSULAR F
+    return 0xa77c;
+  case 0xa77d: // LATIN CAPITAL LETTER INSULAR G
+    return 0x1d79;
+  case 0xa78b: // LATIN CAPITAL LETTER SALTILLO
+    return 0xa78c;
+  case 0xa78d: // LATIN CAPITAL LETTER TURNED H
+    return 0x0265;
+  case 0xa790: // LATIN CAPITAL LETTER N WITH DESCENDER
+    return 0xa791;
+  case 0xa792: // LATIN CAPITAL LETTER C WITH BAR
+    return 0xa793;
+  case 0xa7aa: // LATIN CAPITAL LETTER H WITH HOOK
+    return 0x0266;
+  case 0xa7ab: // LATIN CAPITAL LETTER REVERSED OPEN E
+    return 0x025c;
+  case 0xa7ac: // LATIN CAPITAL LETTER SCRIPT G
+    return 0x0261;
+  case 0xa7ad: // LATIN CAPITAL LETTER L WITH BELT
+    return 0x026c;
+  case 0xa7ae: // LATIN CAPITAL LETTER SMALL CAPITAL I
+    return 0x026a;
+  case 0xa7b0: // LATIN CAPITAL LETTER TURNED K
+    return 0x029e;
+  case 0xa7b1: // LATIN CAPITAL LETTER TURNED T
+    return 0x0287;
+  case 0xa7b2: // LATIN CAPITAL LETTER J WITH CROSSED-TAIL
+    return 0x029d;
+  case 0xa7b3: // LATIN CAPITAL LETTER CHI
+    return 0xab53;
+  case 0xa7b4: // LATIN CAPITAL LETTER BETA
+    return 0xa7b5;
+  case 0xa7b6: // LATIN CAPITAL LETTER OMEGA
+    return 0xa7b7;
+  }
+  return C;
+}
Index: unittests/Support/CMakeLists.txt
===================================================================
--- unittests/Support/CMakeLists.txt
+++ unittests/Support/CMakeLists.txt
@@ -19,6 +19,7 @@
   ConvertUTFTest.cpp
   DataExtractorTest.cpp
   DebugTest.cpp
+  DJBTest.cpp
   EndianStreamTest.cpp
   EndianTest.cpp
   ErrnoTest.cpp
Index: unittests/Support/DJBTest.cpp
===================================================================
--- /dev/null
+++ unittests/Support/DJBTest.cpp
@@ -0,0 +1,89 @@
+//===---------- llvm/unittest/Support/DJBTest.cpp -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DJB.h"
+#include "llvm/ADT/Twine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+TEST(DJBTest, caseFolding) {
+  struct TestCase {
+    StringLiteral One;
+    StringLiteral Two;
+  };
+
+  static constexpr TestCase Tests[] = {
+      {"ASDF", "asdf"},
+      {"qWeR", "QwEr"},
+      {"qqqqqqqqqqqqqqqqqqqq", "QQQQQQQQQQQQQQQQQQQQ"},
+
+      {"I", "i"},
+      // Latin Small Letter Dotless I
+      {u8"\u0130", "i"},
+      // Latin Capital Letter I With Dot Above
+      {u8"\u0131", "i"},
+
+      // Latin Capital Letter A With Grave
+      {u8"\u00c0", u8"\u00e0"},
+      // Latin Capital Letter A With Macron
+      {u8"\u0100", u8"\u0101"},
+      // Latin Capital Letter L With Acute
+      {u8"\u0139", u8"\u013a"},
+      // Cyrillic Capital Letter Ie
+      {u8"\u0415", u8"\u0435"},
+      // Latin Capital Letter A With Circumflex And Grave
+      {u8"\u1ea6", u8"\u1ea7"},
+      // Kelvin Sign
+      {u8"\u212a", u8"\u006b"},
+      // Glagolitic Capital Letter Chrivi
+      {u8"\u2c1d", u8"\u2c4d"},
+      // Fullwidth Latin Capital Letter M
+      {u8"\uff2d", u8"\uff4d"},
+      // Old Hungarian Capital Letter Ej
+      {u8"\U00010c92", u8"\U00010cd2"},
+  };
+
+  for (const TestCase &T : Tests) {
+    SCOPED_TRACE("Comparing '" + T.One + "' and '" + T.Two + "'");
+    EXPECT_EQ(caseFoldingDjbHash(T.One), caseFoldingDjbHash(T.Two));
+  }
+}
+
+TEST(DJBTest, knownValuesLowerCase) {
+  struct TestCase {
+    StringLiteral Text;
+    uint32_t Hash;
+  };
+  static constexpr TestCase Tests[] = {
+      {"", 5381u},
+      {"f", 177675u},
+      {"fo", 5863386u},
+      {"foo", 193491849u},
+      {"foob", 2090263819u},
+      {"fooba", 259229388u},
+      {"foobar", 4259602622u},
+      {"pneumonoultramicroscopicsilicovolcanoconiosis", 3999417781u},
+  };
+
+  for (const TestCase &T: Tests) {
+    SCOPED_TRACE("Text: '" + T.Text + "'");
+    EXPECT_EQ(T.Hash, djbHash(T.Text));
+    EXPECT_EQ(T.Hash, caseFoldingDjbHash(T.Text));
+    EXPECT_EQ(T.Hash, caseFoldingDjbHash(T.Text.upper()));
+  }
+}
+
+TEST(DJBTest, knownValuesUnicode) {
+  EXPECT_EQ(
+      2326183139u,
+      caseFoldingDjbHash(
+          u8"\u0130\u0131\u00c0\u00e0\u0100\u0101\u0139\u013a\u0415\u0435\u1ea6"
+          u8"\u1ea7\u212a\u006b\u2c1d\u2c4d\uff2d\uff4d\U00010c92\U00010cd2"));
+}
Index: utils/unicode-case-fold.py
===================================================================
--- /dev/null
+++ utils/unicode-case-fold.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+"""
+Unicode case folding database conversion utility
+
+Parses the database and generates a C++ function which implements the case
+folding algorithm. The database entries are of the form:
+
+  <code>; <status>; <mapping>; # <name>
+
+<status> can be one of four characters:
+  C - Common mappings
+  S - mappings for Simple case folding
+  F - mappings for Full case folding
+  T - special case for Turkish I characters
+
+Right now this generates a function which implements simple case folding (C+S
+entries).
+"""
+
+import sys
+import re
+import urllib2
+
+# This variable will contain the code to handle special-cased mappings.
+special_cases = ""
+
+# This variable will containg the code to handle blocks of mappings.
+blocks = ""
+
+# Reads file line-by-line, extracts Common and Simple case fold mappings and
+# returns a (from_char, to_char, from_name) tuple.
+def mappings(f):
+    expr = re.compile(r'^(.*); [CS]; (.*); # (.*)')
+    for line in f:
+        m = expr.match(line)
+        if not m: continue
+        from_char = int(m.group(1), 16)
+        to_char = int(m.group(2), 16)
+        from_name = m.group(3)
+        yield from_char, to_char, from_name
+
+# Computes the shift (to_char - from_char) in a mapping.
+def shift(mapping):
+    return mapping[1] - mapping[0]
+
+# Computes the stride (from_char2 - from_char1) of two mappings.
+def stride2(mapping1, mapping2):
+    return mapping2[0] - mapping1[0]
+
+# Computes the stride of a list of mappings. The list should have at least two
+# mappings. All mappings in the list are assumed to have the same stride.
+def stride(block):
+    return stride2(block[0], block[1])
+
+# Generates a special-case code for the mapping.
+def special_case(m):
+    global special_cases
+    special_cases += "  case {0:#06x}: // {2}\n    return {1:#06x};\n".format(*m)
+
+
+# b is a list of mappings. All the mappings are assumed to have the same
+# shift and the stride between adjecant mappings (if any) is constant.
+def dump_block(b):
+    global blocks
+    if len(b) < 3:
+        # don't bother trying to condense short blocks
+        for m in b:
+            special_case(m)
+        return
+
+    blocks += "  // {0} characters\n".format(len(b))
+    first = b[0][0]
+    last = first + stride(b) * (len(b)-1)
+    modulo = first % stride(b)
+    if stride(b) == 2 and shift(b[0]) == 1 and b[0][0]%2 == 0:
+        # Generate slightly more efficient code here.
+        # We can elide the modulo-check because the expression "C|1" will map
+        # the intervening characters to themselves.
+        pattern = "  if (C >= {0:#06x} && C <= {1:#06x})\n    return C | 1;\n"
+        blocks += pattern.format(first, last)
+        return
+
+    pattern = "  if (C >= {0:#06x} && C <= {1:#06x} && C % {2} == {3})\n    return C + {4};\n"
+    blocks += pattern.format(first, last, stride(b), modulo, shift(b[0]))
+
+current_block = []
+f = urllib2.urlopen(sys.argv[1])
+for m in mappings(f):
+    if len(current_block) == 0:
+        current_block.append(m)
+        continue
+
+    if shift(current_block[0]) != shift(m):
+        # Incompatible shift, start a new block.
+        dump_block(current_block)
+        current_block = [m]
+        continue
+
+    if len(current_block) == 1 or stride(current_block) == stride2(current_block[-1], m):
+        current_block.append(m)
+        continue
+
+    # Incompatible stride, start a new block.
+    dump_block(current_block)
+    current_block = [m]
+f.close()
+
+dump_block(current_block)
+
+print '//===---------- Support/UnicodeCaseFold.cpp -------------------------------===//'
+print '//'
+print '// This file was generated by utils/unicode-case-fold.py from the Unicode'
+print '// case folding database at'
+print '//   ', sys.argv[1]
+print '//'
+print '// To regenerate this file, run:'
+print '//   utils/unicode-case-fold.py \\'
+print '//     "{}" \\'.format(sys.argv[1])
+print '//     > lib/Support/UnicodeCaseFold.cpp'
+print '//'
+print '//===----------------------------------------------------------------------===//'
+print ''
+print '#include "llvm/Support/Unicode.h"'
+print ''
+print "int llvm::sys::unicode::foldCharSimple(int C) {"
+print blocks
+print "  switch (C) {"
+print special_cases.rstrip()
+print "  }"
+print "  return C;"
+print "}"