Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -1851,6 +1851,69 @@
     uselistorder i32 (i32) @bar, { 1, 0 }
     uselistorder_bb @foo, %bb, { 5, 1, 3, 2, 0, 4 }
 
+.. _bitsets:
+
+Bitsets
+=======
+
+This is a mechanism that allows IR modules to co-operatively build pointer
+sets corresponding to addresses within a given set of globals. One example
+of a use case for this is to allow a C++ program to efficiently verify (at
+each call site) that a vtable pointer is in the set of valid vtable pointers
+for the type of the class or its derived classes.
+
+To use the mechanism, a client creates a global metadata node named
+``llvm.bitsets``.  Each element is a metadata node with two elements:
+the first is a metadata string containing an identifier for the bitset,
+and the second is the (potentially offseted) address of a global variable.
+
+This will cause a link-time optimization pass to generate bitsets from the
+memory addresses referenced from the elements of the bitset metadata. The pass
+will lay out the referenced globals consecutively, so their definitions must
+be available at LTO time. An intrinsic, :ref:`llvm.bitset.test <bitset.test>`,
+generates code to test whether a given pointer is a member of a bitset.
+
+:Example:
+
+::
+
+    target datalayout = "e-p:32:32"
+
+    @a = internal global i32 0
+    @b = internal global i32 0
+    @c = internal global i32 0
+
+    !llvm.bitsets = !{!0, !1, !2, !3}
+
+    !0 = !{!"bitset1", i32* @a}
+    !1 = !{!"bitset1", i32* @b}
+    !2 = !{!"bitset2", i32* @b}
+    !3 = !{!"bitset2", i32* @c}
+
+    declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+    define i1 @foo(i32* %p) {
+      %pi8 = bitcast i32* %p to i8*
+      %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1")
+      ret i1 %x
+    }
+
+    define i1 @bar(i32* %p) {
+      %pi8 = bitcast i32* %p to i8*
+      %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset2")
+      ret i1 %x
+    }
+
+    define void @main() {
+      %a1 = call i1 @foo(i32* @a) ; returns 1
+      %b1 = call i1 @foo(i32* @b) ; returns 1
+      %c1 = call i1 @foo(i32* @c) ; returns 0
+      %a2 = call i1 @bar(i32* @a) ; returns 0
+      %b2 = call i1 @bar(i32* @b) ; returns 1
+      %c2 = call i1 @bar(i32* @c) ; returns 1
+      ret void
+    }
+
 .. _typesystem:
 
 Type System
@@ -9888,6 +9951,31 @@
 that the optimizer can otherwise deduce or facts that are of little use to the
 optimizer.
 
+.. _bitset.test:
+
+'``llvm.bitset.test``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to be tested. The second argument is a
+metadata string containing the name of a :ref:`bitset <bitsets>`.
+
+Overview:
+"""""""""
+
+The ``llvm.bitset.test`` intrinsic tests whether the given pointer is a
+member of the given bitset.
+
 '``llvm.donothing``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
Index: include/llvm/ADT/EquivalenceClasses.h
===================================================================
--- include/llvm/ADT/EquivalenceClasses.h
+++ include/llvm/ADT/EquivalenceClasses.h
@@ -254,7 +254,7 @@
       assert(Node != nullptr && "Dereferencing end()!");
       return Node->getData();
     }
-    reference operator->() const { return operator*(); }
+    pointer operator->() const { return &operator*(); }
 
     member_iterator &operator++() {
       assert(Node != nullptr && "++'d off the end of the list!");
Index: include/llvm/ADT/PointerUnion.h
===================================================================
--- include/llvm/ADT/PointerUnion.h
+++ include/llvm/ADT/PointerUnion.h
@@ -195,6 +195,12 @@
     return lhs.getOpaqueValue() != rhs.getOpaqueValue();
   }
 
+  template<typename PT1, typename PT2>
+  static bool operator<(PointerUnion<PT1, PT2> lhs,
+                        PointerUnion<PT1, PT2> rhs) {
+    return lhs.getOpaqueValue() < rhs.getOpaqueValue();
+  }
+
   // Teach SmallPtrSet that PointerUnion is "basically a pointer", that has
   // # low bits available = min(PT1bits,PT2bits)-1.
   template<typename PT1, typename PT2>
Index: include/llvm/IR/Intrinsics.td
===================================================================
--- include/llvm/IR/Intrinsics.td
+++ include/llvm/IR/Intrinsics.td
@@ -584,6 +584,11 @@
                                  [LLVMPointerTo<0>, llvm_i32_ty,
                                   LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
                                  [IntrReadArgMem]>;
+
+// Intrinsics to support bit sets.
+def int_bitset_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],
+                                [IntrNoMem]>;
+
 //===----------------------------------------------------------------------===//
 // Target-specific intrinsics
 //===----------------------------------------------------------------------===//
Index: include/llvm/InitializePasses.h
===================================================================
--- include/llvm/InitializePasses.h
+++ include/llvm/InitializePasses.h
@@ -178,6 +178,7 @@
 void initializeLoopUnswitchPass(PassRegistry&);
 void initializeLoopIdiomRecognizePass(PassRegistry&);
 void initializeLowerAtomicPass(PassRegistry&);
+void initializeLowerBitSetsPass(PassRegistry&);
 void initializeLowerExpectIntrinsicPass(PassRegistry&);
 void initializeLowerIntrinsicsPass(PassRegistry&);
 void initializeLowerInvokePass(PassRegistry&);
Index: include/llvm/Transforms/IPO.h
===================================================================
--- include/llvm/Transforms/IPO.h
+++ include/llvm/Transforms/IPO.h
@@ -199,6 +199,10 @@
 /// manager.
 ModulePass *createBarrierNoopPass();
 
+/// \brief This pass lowers bitset metadata and the llvm.bitset.test intrinsic
+/// to bitsets.
+ModulePass *createLowerBitSetsPass();
+
 } // End llvm namespace
 
 #endif
Index: lib/Transforms/IPO/CMakeLists.txt
===================================================================
--- lib/Transforms/IPO/CMakeLists.txt
+++ lib/Transforms/IPO/CMakeLists.txt
@@ -14,6 +14,7 @@
   Inliner.cpp
   Internalize.cpp
   LoopExtractor.cpp
+  LowerBitSets.cpp
   MergeFunctions.cpp
   PartialInlining.cpp
   PassManagerBuilder.cpp
Index: lib/Transforms/IPO/IPO.cpp
===================================================================
--- lib/Transforms/IPO/IPO.cpp
+++ lib/Transforms/IPO/IPO.cpp
@@ -36,6 +36,7 @@
   initializeLoopExtractorPass(Registry);
   initializeBlockExtractorPassPass(Registry);
   initializeSingleLoopExtractorPass(Registry);
+  initializeLowerBitSetsPass(Registry);
   initializeMergeFunctionsPass(Registry);
   initializePartialInlinerPass(Registry);
   initializePruneEHPass(Registry);
Index: lib/Transforms/IPO/LowerBitSets.cpp
===================================================================
--- /dev/null
+++ lib/Transforms/IPO/LowerBitSets.cpp
@@ -0,0 +1,351 @@
+//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic.
+// See http://llvm.org/docs/LangRef.html#bitsets for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+namespace {
+
+struct LowerBitSets : public ModulePass {
+  static char ID;
+  LowerBitSets() : ModulePass(ID) {
+    initializeLowerBitSetsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool buildBitSets(Module &M, const DataLayout *DL);
+  bool eraseBitSetMetadata(Module &M);
+
+  bool runOnModule(Module &M) override;
+};
+
+}
+
+INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
+                "Lower bitset metadata", false, false)
+INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets",
+                "Lower bitset metadata", false, false)
+char LowerBitSets::ID = 0;
+
+ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; }
+
+bool LowerBitSets::buildBitSets(Module &M, const DataLayout *DL) {
+  Function *BitSetTestFunc =
+      M.getFunction(Intrinsic::getName(Intrinsic::bitset_test));
+  if (!BitSetTestFunc)
+    return false;
+
+  // Equivalence class set containing bitsets and the globals they reference.
+  // This is used to partition the set of bitsets in the module into disjoint
+  // sets.
+  typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>>
+      GlobalClassesTy;
+  GlobalClassesTy GlobalClasses;
+
+  // Mapping from bitset mdstrings to the call sites that test them.
+  DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites;
+
+  NamedMDNode *BitSetNM = M.getNamedMetadata("llvm.bitsets");
+
+  for (const Use &U : BitSetTestFunc->uses()) {
+    auto CI = dyn_cast<CallInst>(U.getUser());
+    if (!CI)
+      continue;
+
+    auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+    if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata()))
+      report_fatal_error(
+          "Second argument of llvm.bitset.test must be metadata string");
+    auto BitSet = cast<MDString>(BitSetMDVal->getMetadata());
+
+    std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator,
+              bool> Ins =
+        BitSetTestCallSites.insert(
+            std::make_pair(BitSet, std::vector<CallInst *>()));
+    Ins.first->second.push_back(CI);
+    if (!Ins.second)
+      continue;
+
+    GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet);
+    GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+
+    if (!BitSetNM)
+      continue;
+
+    for (MDNode *Op : BitSetNM->operands()) {
+      if (Op->getNumOperands() != 2)
+        report_fatal_error(
+            "All operands of llvm.bitsets metadata must have 2 elements");
+
+      if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+        continue;
+
+      auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
+      if (!OpConstMD)
+        report_fatal_error("Bit set element must be a constant");
+      auto OpGlobal = dyn_cast<GlobalVariable>(
+          OpConstMD->getValue()->stripInBoundsOffsets());
+      if (!OpGlobal)
+        report_fatal_error("Bit set element must refer to global");
+
+      CurSet = GlobalClasses.unionSets(
+          CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
+    }
+  }
+
+  if (GlobalClasses.empty())
+    return false;
+
+  for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
+                                 E = GlobalClasses.end();
+       I != E; ++I) {
+    if (!I->isLeader()) continue;
+
+    // Build the list of bitsets and referenced globals in this disjoint set.
+    std::vector<MDString *> BitSets;
+    std::vector<GlobalVariable *> Globals;
+    for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
+         MI != GlobalClasses.member_end(); ++MI) {
+      if ((*MI).is<MDString *>())
+        BitSets.push_back(MI->get<MDString *>());
+      else
+        Globals.push_back(MI->get<GlobalVariable *>());
+    }
+
+    // Order bitsets and globals by name for determinism. TODO: We may later
+    // want to use a more sophisticated ordering that lays out globals so as to
+    // minimize the sizes of the bitsets.
+    std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
+      return S1->getString() < S2->getString();
+    });
+    std::sort(Globals.begin(), Globals.end(),
+              [](GlobalVariable *GV1, GlobalVariable *GV2) {
+                return GV1->getName() < GV2->getName();
+              });
+
+    // Build a new global with the combined contents of the referenced globals.
+    std::vector<Constant *> GlobalInits;
+    for (GlobalVariable *G : Globals)
+      GlobalInits.push_back(G->getInitializer());
+    Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);
+    auto NewGlobal =
+        new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
+                           GlobalValue::PrivateLinkage, NewInit);
+
+    const StructLayout *NewGlobalLayout =
+        DL->getStructLayout(cast<StructType>(NewInit->getType()));
+
+    // Compute the offsets of the original globals within the new global.
+    DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
+    for (unsigned I = 0; I != Globals.size(); ++I) {
+      GlobalLayout[Globals[I]] = NewGlobalLayout->getElementOffset(I);
+    }
+
+    Type *Int1Ty = Type::getInt1Ty(M.getContext());
+    Type *Int8Ty = Type::getInt8Ty(M.getContext());
+    Type *Int32Ty = Type::getInt32Ty(M.getContext());
+    Type *IntPtrTy = DL->getIntPtrType(M.getContext(), 0);
+
+    // For each bitset in this disjoint set...
+    for (MDString *BS : BitSets) {
+      std::vector<uint64_t> Offsets;
+      uint64_t Min = ~0UL, Max = 0;
+
+      // Compute the byte offset of each element of this bitset.
+      if (BitSetNM) {
+        for (MDNode *Op : BitSetNM->operands()) {
+          if (Op->getOperand(0) != BS || !Op->getOperand(1))
+            continue;
+          Constant *Ptr =
+              dyn_cast<ConstantAsMetadata>(Op->getOperand(1))->getValue();
+
+          // TODO: Handle other kinds of constants here.
+          uint64_t Offset = 0;
+          if (auto GEP = dyn_cast<GEPOperator>(Ptr)) {
+            APInt APOffset(DL->getPointerSizeInBits(0), 0);
+            bool Result = GEP->accumulateConstantOffset(*DL, APOffset);
+            (void) Result;
+            assert(Result);
+            Offset = APOffset.getZExtValue();
+          }
+
+          auto OpGlobal = cast<GlobalVariable>(Ptr->stripInBoundsOffsets());
+          Offset += GlobalLayout[OpGlobal];
+
+          if (Min > Offset)
+            Min = Offset;
+          if (Max < Offset)
+            Max = Offset;
+
+          Offsets.push_back(Offset);
+        }
+      }
+
+      if (Min > Max)
+        Min = 0;
+
+      // Normalize each offset against the minimum observed offset, and compute
+      // the bitwise OR of each of the offsets. The number of trailing zeros
+      // in the mask gives us the log2 of the alignment of all offsets, which
+      // allows us to compress the bitset by only storing one bit per aligned
+      // address.
+      uint64_t Mask = 0;
+      for (uint64_t &Offset : Offsets) {
+        Offset -= Min;
+        Mask |= Offset;
+      }
+
+      unsigned AlignPow2 = 0;
+      // FIXME: Can probably do something smarter if all offsets are 0.
+      if (Mask != 0)
+        AlignPow2 = countTrailingZeros(Mask, ZB_Undefined);
+
+      // Build the compressed bitset while normalizing the offsets against the
+      // computed alignment.
+      uint64_t BitSize = ((Max - Min) >> AlignPow2) + 1;
+      uint64_t ByteSize = (BitSize + 7) / 8;
+      std::vector<uint8_t> Bits(ByteSize);
+      for (uint64_t Offset : Offsets) {
+        Offset >>= AlignPow2;
+        Bits[Offset / 8] |= 1 << (Offset % 8);
+      }
+
+      Constant *BitsConst = ConstantDataArray::get(M.getContext(), Bits);
+      auto BitSetGlobal = new GlobalVariable(
+          M, BitsConst->getType(), /*isConstant=*/true,
+          GlobalValue::PrivateLinkage, BitsConst, BS->getString() + ".bits");
+
+      Constant *GlobalAsInt = ConstantExpr::getPtrToInt(NewGlobal, IntPtrTy);
+      Constant *GlobalMinAsInt =
+          ConstantExpr::getAdd(GlobalAsInt, ConstantInt::get(IntPtrTy, Min));
+      Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BitSize);
+
+      // Generate code to test the bitset for each call to llvm.bitset.test for
+      // this bitset.
+      for (CallInst *CI : BitSetTestCallSites[BS]) {
+        Value *Ptr = CI->getArgOperand(0);
+        BasicBlock *InitialBB = CI->getParent();
+
+        IRBuilder<> B(CI);
+
+        Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
+
+        Value *PtrOffset = B.CreateSub(PtrAsInt, GlobalMinAsInt);
+
+        Value *BitOffset;
+        if (AlignPow2 == 0) {
+          BitOffset = PtrOffset;
+        } else {
+          // We need to check that the offset both falls within our range
+          // and is suitably aligned. We can check both properties at the
+          // same time by performing a right rotate by log2(alignment)
+          // followed by an integer comparison against the bitset size. The
+          // rotate will move the lower order bits that need to be zero into
+          // the higher order bits of the result, causing the comparison to
+          // fail if they are nonzero. The rotate also conveniently gives
+          // us a bit offset to use during the load from the bitset.
+          Value *OffsetSHR =
+              B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, AlignPow2));
+          Value *OffsetSHL = B.CreateShl(
+              PtrOffset,
+              ConstantInt::get(IntPtrTy,
+                               DL->getPointerSizeInBits(0) - AlignPow2));
+          BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
+        }
+
+        Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
+
+        TerminatorInst *Term =
+            SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
+        IRBuilder<> ThenB(Term);
+
+        // Now that we know that the offset is in range and aligned, load
+        // the appropriate bit from the bitset. TODO: We might want to use the
+        // bt instruction with the previously computed bit offset; LLVM does
+        // not currently expose this instruction.
+        Value *BitSetGlobalOffset = ThenB.CreateLShr(
+            PtrOffset, ConstantInt::get(IntPtrTy, AlignPow2 + 3));
+        Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), BitSetGlobalOffset};
+        Value *BitSetGlobalAddr = ThenB.CreateGEP(BitSetGlobal, Idxs);
+        Value *BitSetGlobalVal = ThenB.CreateLoad(BitSetGlobalAddr);
+
+        Value *BitSetShift = ThenB.CreateTrunc(BitOffset, Int8Ty);
+        BitSetShift = ThenB.CreateAnd(BitSetShift, ConstantInt::get(Int8Ty, 7));
+
+        Value *Bit = ThenB.CreateLShr(BitSetGlobalVal, BitSetShift);
+        Bit = ThenB.CreateTrunc(Bit, Int1Ty);
+
+        // The value we want is 0 if we came directly from the initial block
+        // (having failed the range or alignment checks), or the loaded bit if
+        // we came from the block in which we loaded it.
+        B.SetInsertPoint(CI);
+        PHINode *P = B.CreatePHI(Int1Ty, 2);
+        P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
+        P->addIncoming(Bit, ThenB.GetInsertBlock());
+
+        CI->replaceAllUsesWith(P);
+        CI->eraseFromParent();
+      }
+    }
+
+    // Build aliases pointing to offsets into the combined global for each
+    // global from which we built the combined global, and replace references
+    // to the original globals with references to the aliases.
+    for (unsigned I = 0; I != Globals.size(); ++I) {
+      Constant *NewGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
+                                   ConstantInt::get(Int32Ty, I)};
+      Constant *NewGlobalElemPtr =
+          ConstantExpr::getGetElementPtr(NewGlobal, NewGlobalIdxs);
+      GlobalAlias *GAlias = GlobalAlias::create(
+          Globals[I]->getType()->getElementType(),
+          Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(),
+          "", NewGlobalElemPtr, &M);
+      GAlias->takeName(Globals[I]);
+      Globals[I]->replaceAllUsesWith(GAlias);
+      Globals[I]->eraseFromParent();
+    }
+  }
+
+  return true;
+}
+
+bool LowerBitSets::eraseBitSetMetadata(Module &M) {
+  NamedMDNode *MD = M.getNamedMetadata("llvm.bitsets");
+  if (!MD)
+    return false;
+
+  M.eraseNamedMetadata(MD);
+  return true;
+}
+
+bool LowerBitSets::runOnModule(Module &M) {
+  const DataLayout *DL = M.getDataLayout();
+  if (!DL)
+    return false;
+
+  bool Changed = buildBitSets(M, DL);
+  Changed |= eraseBitSetMetadata(M);
+  return Changed;
+}
Index: lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- lib/Transforms/IPO/PassManagerBuilder.cpp
+++ lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -476,6 +476,9 @@
   // currently it damages debug info.
   if (MergeFunctions)
     PM.add(createMergeFunctionsPass());
+
+  // Lower bitset constants to bitsets.
+  PM.add(createLowerBitSetsPass());
 }
 
 void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
Index: test/Transforms/LowerBitSets/simple.ll
===================================================================
--- /dev/null
+++ test/Transforms/LowerBitSets/simple.ll
@@ -0,0 +1,119 @@
+; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+; CHECK: [[G:@[^ ]*]] = private constant { i32, i32, i32, [2 x i32] } { i32 1, i32 2, i32 3, [2 x i32] [i32 4, i32 5] }
+@a = internal constant i32 1
+@b = internal constant i32 2
+@c = internal constant i32 3
+@d = internal constant [2 x i32] [i32 4, i32 5]
+
+; Offset 0, 4 byte alignment
+; CHECK: @bitset1.bits = private constant [1 x i8] c"\13"
+!0 = !{!"bitset1", i32* @a }
+!1 = !{!"bitset1", i32* @b }
+!2 = !{!"bitset1", i32* getelementptr ([2 x i32]* @d, i32 0, i32 1) }
+
+; Offset 4, 4 byte alignment
+; CHECK: @bitset2.bits = private constant [1 x i8] c"\03"
+!3 = !{!"bitset2", i32* @b }
+!4 = !{!"bitset2", i32* @c }
+
+; Offset 0, 8 byte alignment
+; CHECK: @bitset3.bits = private constant [1 x i8] c"\03"
+!5 = !{!"bitset3", i32* @a }
+!6 = !{!"bitset3", i32* @c }
+
+; Entries whose second operand is null (the result of a global being DCE'd)
+; should be ignored.
+!7 = !{!"bitset2", null }
+
+!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 }
+
+declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK: @foo(i32* [[A0:%[^ ]*]])
+define i1 @foo(i32* %p) {
+  ; CHECK-NOT: llvm.bitset.test
+
+  ; CHECK: [[R0:%[^ ]*]] = bitcast i32* [[A0]] to i8*
+  %pi8 = bitcast i32* %p to i8*
+  ; CHECK: [[R1:%[^ ]*]] = ptrtoint i8* [[R0]] to i32
+  ; CHECK: [[R2:%[^ ]*]] = sub i32 [[R1]], ptrtoint ({ i32, i32, i32, [2 x i32] }* [[G]] to i32)
+  ; CHECK: [[R3:%[^ ]*]] = lshr i32 [[R2]], 2
+  ; CHECK: [[R4:%[^ ]*]] = shl i32 [[R2]], 30
+  ; CHECK: [[R5:%[^ ]*]] = or i32 [[R3]], [[R4]]
+  ; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 5
+  ; CHECK: br i1 [[R6]]
+
+  ; CHECK: [[R8:%[^ ]*]] = lshr i32 [[R2]], 5
+  ; CHECK: [[R9:%[^ ]*]] = getelementptr [1 x i8]* @bitset1.bits, i32 0, i32 [[R8]]
+  ; CHECK: [[R10:%[^ ]*]] = load i8* [[R9]]
+  ; CHECK: [[R11:%[^ ]*]] = trunc i32 [[R5]] to i8
+  ; CHECK: [[R12:%[^ ]*]] = and i8 [[R11]], 7
+  ; CHECK: [[R13:%[^ ]*]] = lshr i8 [[R10]], [[R12]]
+  ; CHECK: [[R14:%[^ ]*]] = trunc i8 [[R13]] to i1
+
+  ; CHECK: [[R16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[R14]], {{%[^ ]*}} ]
+  %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1")
+
+  ; CHECK-NOT: llvm.bitset.test
+  %y = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1")
+
+  ; CHECK: ret i1 [[R16]]
+  ret i1 %x
+}
+
+; CHECK: @bar(i32* [[B0:%[^ ]*]])
+define i1 @bar(i32* %p) {
+  ; CHECK: [[S0:%[^ ]*]] = bitcast i32* [[B0]] to i8*
+  %pi8 = bitcast i32* %p to i8*
+  ; CHECK: [[S1:%[^ ]*]] = ptrtoint i8* [[S0]] to i32
+  ; CHECK: [[S2:%[^ ]*]] = sub i32 [[S1]], add (i32 ptrtoint ({ i32, i32, i32, [2 x i32] }* [[G]] to i32), i32 4)
+  ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 2
+  ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 30
+  ; CHECK: [[S5:%[^ ]*]] = or i32 [[S3]], [[S4]]
+  ; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 2
+  ; CHECK: br i1 [[S6]]
+
+  ; CHECK: [[S8:%[^ ]*]] = lshr i32 [[S2]], 5
+  ; CHECK: [[S9:%[^ ]*]] = getelementptr [1 x i8]* @bitset2.bits, i32 0, i32 [[S8]]
+  ; CHECK: [[S10:%[^ ]*]] = load i8* [[S9]]
+  ; CHECK: [[S11:%[^ ]*]] = trunc i32 [[S5]] to i8
+  ; CHECK: [[S12:%[^ ]*]] = and i8 [[S11]], 7
+  ; CHECK: [[S13:%[^ ]*]] = lshr i8 [[S10]], [[S12]]
+  ; CHECK: [[S14:%[^ ]*]] = trunc i8 [[S13]] to i1
+
+  ; CHECK: [[S16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[S14]], {{%[^ ]*}} ]
+  %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset2")
+  ; CHECK: ret i1 [[S16]]
+  ret i1 %x
+}
+
+; CHECK: @baz(i32* [[C0:%[^ ]*]])
+define i1 @baz(i32* %p) {
+  ; CHECK: [[T0:%[^ ]*]] = bitcast i32* [[C0]] to i8*
+  %pi8 = bitcast i32* %p to i8*
+  ; CHECK: [[T1:%[^ ]*]] = ptrtoint i8* [[T0]] to i32
+  ; CHECK: [[T2:%[^ ]*]] = sub i32 [[T1]], ptrtoint ({ i32, i32, i32, [2 x i32] }* [[G]] to i32)
+  ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 3
+  ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 29
+  ; CHECK: [[T5:%[^ ]*]] = or i32 [[T3]], [[T4]]
+  ; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 2
+  ; CHECK: br i1 [[T6]]
+
+  ; CHECK: [[T8:%[^ ]*]] = lshr i32 [[T2]], 6
+  ; CHECK: [[T9:%[^ ]*]] = getelementptr [1 x i8]* @bitset3.bits, i32 0, i32 [[T8]]
+  ; CHECK: [[T10:%[^ ]*]] = load i8* [[T9]]
+  ; CHECK: [[T11:%[^ ]*]] = trunc i32 [[T5]] to i8
+  ; CHECK: [[T12:%[^ ]*]] = and i8 [[T11]], 7
+  ; CHECK: [[T13:%[^ ]*]] = lshr i8 [[T10]], [[T12]]
+  ; CHECK: [[T14:%[^ ]*]] = trunc i8 [[T13]] to i1
+
+  ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T14]], {{%[^ ]*}} ]
+  %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3")
+  ; CHECK: ret i1 [[T16]]
+  ret i1 %x
+}
+
+; CHECK-NOT: !llvm.bitsets