Index: docs/BitCodeFormat.rst =================================================================== --- docs/BitCodeFormat.rst +++ docs/BitCodeFormat.rst @@ -672,7 +672,7 @@ MODULE_CODE_GLOBALVAR Record ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``[GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, dllstorageclass]`` +``[GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat, bitset]`` The ``GLOBALVAR`` record (code 7) marks the declaration or definition of a global variable. The operand fields are: Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -591,10 +591,14 @@ Variables and aliases can have a :ref:`Thread Local Storage Model `. +A global may be marked as a :ref:`bitset ` global using the ``bitset`` +flag. + Syntax:: [@ =] [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] [unnamed_addr] [AddrSpace] [ExternallyInitialized] + [bitset] [] [, section "name"] [, comdat [($name)]] [, align ] @@ -1851,6 +1855,65 @@ uselistorder i32 (i32) @bar, { 1, 0 } uselistorder_bb @foo, %bb, { 5, 1, 3, 2, 0, 4 } +.. _bitsets: + +Bitsets +======= + +This is a mechanism that allows IR modules to co-operatively build pointer +sets corresponding to addresses within a given set of globals. One example +of a use case for this is to allow a C++ program to efficiently verify (at +each call site) that a vtable pointer is in the set of valid vtable pointers +for the type of the class or its derived classes. + +To use the mechanism, a client marks a global variable containing an +array of pointers with the ``bitset`` flag. This will cause a link-time +optimization pass to generate a bitset from the memory addresses referenced +from the elements of the array. The pass will lay out the referenced globals +consecutively, so their definitions must be available at LTO time. An +intrinsic, :ref:`llvm.bitset.test `, generates code to test +whether a given pointer is a member of a bitset. + +Bitsets may have ``appending`` linkage, which allows bitsets to be formed +from multiple IR modules using the IR linker. + +:Example: + +:: + + target datalayout = "e-p:32:32" + + @a = internal global i32 0 + @b = internal global i32 0 + @c = internal global i32 0 + + @bitset1 = internal bitset constant [2 x i32*] [ i32* @a, i32* @b ] + @bitset2 = internal bitset constant [2 x i32*] [ i32* @b, i32* @c ] + + declare i1 @llvm.bitset.test(i8* %ptr, i8* %bitset) nounwind readnone + + define i1 @foo(i32* %p) { + %pi8 = bitcast i32* %p to i8* + %x = call i1 @llvm.bitset.test(i8* %pi8, i8* bitcast ([2 x i32*]* @bitset1 to i8*)) + ret i1 %x + } + + define i1 @bar(i32* %p) { + %pi8 = bitcast i32* %p to i8* + %x = call i1 @llvm.bitset.test(i8* %pi8, i8* bitcast ([2 x i32*]* @bitset2 to i8*)) + ret i1 %x + } + + define void @main() { + %a1 = call i1 @foo(i32* @a) ; returns 1 + %b1 = call i1 @foo(i32* @b) ; returns 1 + %c1 = call i1 @foo(i32* @c) ; returns 0 + %a2 = call i1 @bar(i32* @a) ; returns 0 + %b2 = call i1 @bar(i32* @b) ; returns 1 + %c2 = call i1 @bar(i32* @c) ; returns 1 + ret void + } + .. _typesystem: Type System @@ -9888,6 +9951,31 @@ that the optimizer can otherwise deduce or facts that are of little use to the optimizer. +.. _bitset.test: + +'``llvm.bitset.test``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i1 @llvm.bitset.test(i8* %ptr, i8* %bitset) nounwind readnone + + +Arguments: +"""""""""" + +The first argument is a pointer to be tested. The second argument must be a +(potentially bitcasted) reference to a specific :ref:`bitset ` global. + +Overview: +""""""""" + +The ``llvm.bitset.test`` intrinsic tests whether the given pointer is a +member of the given bitset global. + '``llvm.donothing``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: include/llvm/IR/GlobalVariable.h =================================================================== --- include/llvm/IR/GlobalVariable.h +++ include/llvm/IR/GlobalVariable.h @@ -45,6 +45,7 @@ // can change from its initial // value before global // initializers are run? + bool IsBitSet : 1; // Is this a bitset global? public: // allocate space for exactly one operand @@ -151,6 +152,13 @@ isExternallyInitializedConstant = Val; } + bool isBitSet() const { + return IsBitSet; + } + void setIsBitSet(bool Val) { + IsBitSet = Val; + } + /// copyAttributesFrom - copy all additional attributes (those not needed to /// create a GlobalVariable) from the GlobalVariable Src to this one. void copyAttributesFrom(const GlobalValue *Src) override; Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -584,6 +584,11 @@ [LLVMPointerTo<0>, llvm_i32_ty, LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>], [IntrReadArgMem]>; + +// Intrinsics to support bit sets. +def int_bitset_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_ptr_ty], + [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -178,6 +178,7 @@ void initializeLoopUnswitchPass(PassRegistry&); void initializeLoopIdiomRecognizePass(PassRegistry&); void initializeLowerAtomicPass(PassRegistry&); +void initializeLowerBitSetsPass(PassRegistry&); void initializeLowerExpectIntrinsicPass(PassRegistry&); void initializeLowerIntrinsicsPass(PassRegistry&); void initializeLowerInvokePass(PassRegistry&); Index: include/llvm/Transforms/IPO.h =================================================================== --- include/llvm/Transforms/IPO.h +++ include/llvm/Transforms/IPO.h @@ -199,6 +199,10 @@ /// manager. ModulePass *createBarrierNoopPass(); +/// \brief This pass lowers bitset globals and the llvm.bitset.test intrinsic to +/// bitsets. +ModulePass *createLowerBitSetsPass(); + } // End llvm namespace #endif Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -514,6 +514,7 @@ KEYWORD(protected); KEYWORD(unnamed_addr); KEYWORD(externally_initialized); + KEYWORD(bitset); KEYWORD(extern_weak); KEYWORD(external); KEYWORD(thread_local); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -732,8 +732,8 @@ "symbol with local linkage must have default visibility"); unsigned AddrSpace; - bool IsConstant, IsExternallyInitialized; - LocTy IsExternallyInitializedLoc; + bool IsConstant, IsExternallyInitialized, IsBitSet; + LocTy IsExternallyInitializedLoc, IsBitSetLoc; LocTy TyLoc; Type *Ty = nullptr; @@ -741,6 +741,7 @@ ParseOptionalToken(lltok::kw_externally_initialized, IsExternallyInitialized, &IsExternallyInitializedLoc) || + ParseOptionalToken(lltok::kw_bitset, IsBitSet, &IsBitSetLoc) || ParseGlobalType(IsConstant) || ParseType(Ty, TyLoc)) return true; @@ -802,6 +803,7 @@ GV->setVisibility((GlobalValue::VisibilityTypes)Visibility); GV->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass); GV->setExternallyInitialized(IsExternallyInitialized); + GV->setIsBitSet(IsBitSet); GV->setThreadLocalMode(TLM); GV->setUnnamedAddr(UnnamedAddr); Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -46,6 +46,7 @@ kw_default, kw_hidden, kw_protected, kw_unnamed_addr, kw_externally_initialized, + kw_bitset, kw_extern_weak, kw_external, kw_thread_local, kw_localdynamic, kw_initialexec, kw_localexec, Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -2156,7 +2156,8 @@ } // GLOBALVAR: [pointer type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, - // unnamed_addr, dllstorageclass] + // unnamed_addr, externally_initialized, dllstorageclass, + // comdat, bitset] case bitc::MODULE_CODE_GLOBALVAR: { if (Record.size() < 6) return Error("Invalid record"); @@ -2224,6 +2225,10 @@ } else if (hasImplicitComdat(RawLinkage)) { NewGV->setComdat(reinterpret_cast(1)); } + + if (Record.size() > 12) + NewGV->setIsBitSet(Record[12]); + break; } // FUNCTION: [type, callingconv, isproto, linkage, paramattr, Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -653,7 +653,8 @@ // GLOBALVAR: [type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, - // unnamed_addr, externally_initialized, dllstorageclass] + // unnamed_addr, externally_initialized, dllstorageclass, + // comdat, bitset] Vals.push_back(VE.getTypeID(GV.getType())); Vals.push_back(GV.isConstant()); Vals.push_back(GV.isDeclaration() ? 0 : @@ -663,7 +664,7 @@ Vals.push_back(GV.hasSection() ? SectionMap[GV.getSection()] : 0); if (GV.isThreadLocal() || GV.getVisibility() != GlobalValue::DefaultVisibility || - GV.hasUnnamedAddr() || GV.isExternallyInitialized() || + GV.hasUnnamedAddr() || GV.isExternallyInitialized() || GV.isBitSet() || GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass || GV.hasComdat()) { Vals.push_back(getEncodedVisibility(GV)); @@ -672,6 +673,7 @@ Vals.push_back(GV.isExternallyInitialized()); Vals.push_back(getEncodedDLLStorageClass(GV)); Vals.push_back(GV.hasComdat() ? VE.getComdatID(GV.getComdat()) : 0); + Vals.push_back(GV.isBitSet()); } else { AbbrevToUse = SimpleGVarAbbrev; } Index: lib/IR/AsmWriter.cpp =================================================================== --- lib/IR/AsmWriter.cpp +++ lib/IR/AsmWriter.cpp @@ -1779,6 +1779,7 @@ if (unsigned AddressSpace = GV->getType()->getAddressSpace()) Out << "addrspace(" << AddressSpace << ") "; if (GV->isExternallyInitialized()) Out << "externally_initialized "; + if (GV->isBitSet()) Out << "bitset "; Out << (GV->isConstant() ? "constant " : "global "); TypePrinter.print(GV->getType()->getElementType(), Out); Index: lib/IR/Globals.cpp =================================================================== --- lib/IR/Globals.cpp +++ lib/IR/Globals.cpp @@ -151,7 +151,8 @@ OperandTraits::op_begin(this), InitVal != nullptr, Link, Name), isConstantGlobal(constant), - isExternallyInitializedConstant(isExternallyInitialized) { + isExternallyInitializedConstant(isExternallyInitialized), + IsBitSet(false) { setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && @@ -169,7 +170,8 @@ OperandTraits::op_begin(this), InitVal != nullptr, Link, Name), isConstantGlobal(constant), - isExternallyInitializedConstant(isExternallyInitialized) { + isExternallyInitializedConstant(isExternallyInitialized), + IsBitSet(false) { setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && @@ -238,6 +240,7 @@ const GlobalVariable *SrcVar = cast(Src); setThreadLocalMode(SrcVar->getThreadLocalMode()); setExternallyInitialized(SrcVar->isExternallyInitialized()); + setIsBitSet(SrcVar->isBitSet()); } Index: lib/Object/IRObjectFile.cpp =================================================================== --- lib/Object/IRObjectFile.cpp +++ lib/Object/IRObjectFile.cpp @@ -227,7 +227,7 @@ if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage()) Res |= BasicSymbolRef::SF_Weak; - if (GV->getName().startswith("llvm.")) + if (GV->hasAppendingLinkage() || GV->getName().startswith("llvm.")) Res |= BasicSymbolRef::SF_FormatSpecific; else if (auto *Var = dyn_cast(GV)) { if (Var->getSection() == StringRef("llvm.metadata")) Index: lib/Transforms/IPO/CMakeLists.txt =================================================================== --- lib/Transforms/IPO/CMakeLists.txt +++ lib/Transforms/IPO/CMakeLists.txt @@ -14,6 +14,7 @@ Inliner.cpp Internalize.cpp LoopExtractor.cpp + LowerBitSets.cpp MergeFunctions.cpp PartialInlining.cpp PassManagerBuilder.cpp Index: lib/Transforms/IPO/IPO.cpp =================================================================== --- lib/Transforms/IPO/IPO.cpp +++ lib/Transforms/IPO/IPO.cpp @@ -36,6 +36,7 @@ initializeLoopExtractorPass(Registry); initializeBlockExtractorPassPass(Registry); initializeSingleLoopExtractorPass(Registry); + initializeLowerBitSetsPass(Registry); initializeMergeFunctionsPass(Registry); initializePartialInlinerPass(Registry); initializePruneEHPass(Registry); Index: lib/Transforms/IPO/LowerBitSets.cpp =================================================================== --- /dev/null +++ lib/Transforms/IPO/LowerBitSets.cpp @@ -0,0 +1,354 @@ +//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers bitset globals and calls to the llvm.bitset.test intrinsic. +// See http://llvm.org/docs/LangRef.html#bitsets for more information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +namespace { + +struct LowerBitSets : public ModulePass { + static char ID; + LowerBitSets() : ModulePass(ID) { + initializeLowerBitSetsPass(*PassRegistry::getPassRegistry()); + } + + bool buildBitSets(Module &M, const DataLayout *DL); + bool eraseBitSetGlobals(Module &M); + + bool runOnModule(Module &M) override; +}; + +} + +INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets", + "Lower bitset globals", false, false) +INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets", + "Lower bitset globals", false, false) +char LowerBitSets::ID = 0; + +ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; } + +bool LowerBitSets::buildBitSets(Module &M, const DataLayout *DL) { + Function *BitSetTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::bitset_test)); + if (!BitSetTestFunc) + return false; + + // Equivalence class set containing bitsets and the globals they reference. + // This is used to partition the set of bitsets in the module into disjoint + // sets. + EquivalenceClasses GlobalClasses; + + // Mapping from bitset globals to the call sites that test them. + DenseMap> BitSetTestCallSites; + + for (const Use &U : BitSetTestFunc->uses()) { + auto CI = dyn_cast(U.getUser()); + if (!CI) + continue; + + auto BitSet = + dyn_cast(CI->getArgOperand(1)->stripPointerCasts()); + if (!BitSet || !BitSet->isBitSet()) + report_fatal_error("Second argument of llvm.bitset.test must be bitset"); + + std::pair>::iterator, + bool> Ins = + BitSetTestCallSites.insert( + std::make_pair(BitSet, std::vector())); + Ins.first->second.push_back(CI); + if (!Ins.second) + continue; + + EquivalenceClasses::iterator GCI = + GlobalClasses.insert(BitSet); + + EquivalenceClasses::member_iterator CurSet = + GlobalClasses.findLeader(GCI); + + if (BitSet->isDeclaration()) + report_fatal_error("Bit set must have initializer"); + + Constant *Init = BitSet->getInitializer(); + if (isa(Init)) { + ArrayType *AT = dyn_cast(Init->getType()); + if (!AT) + report_fatal_error("Bit set initializer must be constant array"); + if (AT->getNumElements() != 0) + report_fatal_error("Bit set initializer element must refer to global"); + continue; + } + + auto CA = dyn_cast(Init); + if (!CA) + report_fatal_error("Bit set initializer must be constant array"); + + for (Value *Op : CA->operands()) { + auto OpGlobal = dyn_cast(Op->stripInBoundsOffsets()); + if (!OpGlobal) + report_fatal_error("Bit set initializer element must refer to global"); + + CurSet = GlobalClasses.unionSets( + CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal))); + } + } + + if (GlobalClasses.empty()) + return false; + + for (EquivalenceClasses::iterator I = GlobalClasses.begin(), + E = GlobalClasses.end(); + I != E; ++I) { + if (!I->isLeader()) continue; + + // Build the list of bitsets and referenced globals in this disjoint set. + std::vector BitSets, Globals; + for (EquivalenceClasses::member_iterator MI = + GlobalClasses.member_begin(I); + MI != GlobalClasses.member_end(); ++MI) { + if ((*MI)->isBitSet()) + BitSets.push_back(*MI); + else + Globals.push_back(*MI); + } + + // Order bitsets and globals by name for determinism. TODO: We may later + // want to use a more sophisticated ordering that lays out globals so as to + // minimize the sizes of the bitsets. + auto ByName = [](GlobalVariable *GV1, GlobalVariable *GV2) { + return GV1->getName() < GV2->getName(); + }; + std::sort(BitSets.begin(), BitSets.end(), ByName); + std::sort(Globals.begin(), Globals.end(), ByName); + + // Build a new global with the combined contents of the referenced globals. + std::vector GlobalInits; + for (GlobalVariable *G : Globals) + GlobalInits.push_back(G->getInitializer()); + Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); + auto NewGlobal = + new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, NewInit); + + const StructLayout *NewGlobalLayout = + DL->getStructLayout(cast(NewInit->getType())); + + // Compute the offsets of the original globals within the new global. + DenseMap GlobalLayout; + for (unsigned I = 0; I != Globals.size(); ++I) { + GlobalLayout[Globals[I]] = NewGlobalLayout->getElementOffset(I); + } + + Type *Int1Ty = Type::getInt1Ty(M.getContext()); + Type *Int8Ty = Type::getInt8Ty(M.getContext()); + Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Type *IntPtrTy = DL->getIntPtrType(M.getContext(), 0); + + // For each bitset in this disjoint set... + for (GlobalVariable *BS : BitSets) { + std::vector Offsets; + uint64_t Min = ~0UL, Max = 0; + + // Compute the byte offset of each element of this bitset. + for (Value *Op : BS->getInitializer()->operands()) { + // TODO: Handle other kinds of constants here. + uint64_t Offset = 0; + if (auto GEP = dyn_cast(Op)) { + APInt APOffset(DL->getPointerSizeInBits(0), 0); + bool Result = GEP->accumulateConstantOffset(*DL, APOffset); + (void) Result; + assert(Result); + Offset = APOffset.getZExtValue(); + } + + auto OpGlobal = cast(Op->stripInBoundsOffsets()); + Offset += GlobalLayout[OpGlobal]; + + if (Min > Offset) + Min = Offset; + if (Max < Offset) + Max = Offset; + + Offsets.push_back(Offset); + } + + if (Min > Max) + Min = 0; + + // Normalize each offset against the minimum observed offset, and compute + // the bitwise OR of each of the offsets. The number of trailing zeros + // in the mask gives us the log2 of the alignment of all offsets, which + // allows us to compress the bitset by only storing one bit per aligned + // address. + uint64_t Mask = 0; + for (uint64_t &Offset : Offsets) { + Offset -= Min; + Mask |= Offset; + } + + unsigned AlignPow2 = 0; + // FIXME: Can probably do something smarter if all offsets are 0. + if (Mask != 0) + AlignPow2 = countTrailingZeros(Mask, ZB_Undefined); + + // Build the compressed bitset while normalizing the offsets against the + // computed alignment. + uint64_t BitSize = ((Max - Min) >> AlignPow2) + 1; + uint64_t ByteSize = (BitSize + 7) / 8; + std::vector Bits(ByteSize); + for (uint64_t Offset : Offsets) { + Offset >>= AlignPow2; + Bits[Offset / 8] |= 1 << (Offset % 8); + } + + Constant *BitsConst = ConstantDataArray::get(M.getContext(), Bits); + auto BitSetGlobal = new GlobalVariable( + M, BitsConst->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, BitsConst, BS->getName() + ".bits"); + + Constant *GlobalAsInt = ConstantExpr::getPtrToInt(NewGlobal, IntPtrTy); + Constant *GlobalMinAsInt = + ConstantExpr::getAdd(GlobalAsInt, ConstantInt::get(IntPtrTy, Min)); + Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BitSize); + + // Generate code to test the bitset for each call to llvm.bitset.test for + // this bitset. + for (CallInst *CI : BitSetTestCallSites[BS]) { + Value *Ptr = CI->getArgOperand(0); + BasicBlock *InitialBB = CI->getParent(); + + IRBuilder<> B(CI); + + Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); + + Value *PtrOffset = B.CreateSub(PtrAsInt, GlobalMinAsInt); + + Value *BitOffset; + if (AlignPow2 == 0) { + BitOffset = PtrOffset; + } else { + // We need to check that the offset both falls within our range + // and is suitably aligned. We can check both properties at the + // same time by performing a right rotate by log2(alignment) + // followed by an integer comparison against the bitset size. The + // rotate will move the lower order bits that need to be zero into + // the higher order bits of the result, causing the comparison to + // fail if they are nonzero. The rotate also conveniently gives + // us a bit offset to use during the load from the bitset. + Value *OffsetSHR = + B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, AlignPow2)); + Value *OffsetSHL = B.CreateShl( + PtrOffset, + ConstantInt::get(IntPtrTy, + DL->getPointerSizeInBits(0) - AlignPow2)); + BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); + } + + Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst); + + TerminatorInst *Term = + SplitBlockAndInsertIfThen(OffsetInRange, CI, false); + IRBuilder<> ThenB(Term); + + // Now that we know that the offset is in range and aligned, load + // the appropriate bit from the bitset. TODO: We might want to use the + // bt instruction with the previously computed bit offset; LLVM does + // not currently expose this instruction. + Value *BitSetGlobalOffset = ThenB.CreateLShr( + PtrOffset, ConstantInt::get(IntPtrTy, AlignPow2 + 3)); + Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), BitSetGlobalOffset}; + Value *BitSetGlobalAddr = ThenB.CreateGEP(BitSetGlobal, Idxs); + Value *BitSetGlobalVal = ThenB.CreateLoad(BitSetGlobalAddr); + + Value *BitSetShift = ThenB.CreateTrunc(BitOffset, Int8Ty); + BitSetShift = ThenB.CreateAnd(BitSetShift, ConstantInt::get(Int8Ty, 7)); + + Value *Bit = ThenB.CreateLShr(BitSetGlobalVal, BitSetShift); + Bit = ThenB.CreateTrunc(Bit, Int1Ty); + + // The value we want is 0 if we came directly from the initial block + // (having failed the range or alignment checks), or the loaded bit if + // we came from the block in which we loaded it. + B.SetInsertPoint(CI); + PHINode *P = B.CreatePHI(Int1Ty, 2); + P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); + P->addIncoming(Bit, ThenB.GetInsertBlock()); + + CI->replaceAllUsesWith(P); + CI->eraseFromParent(); + } + } + + // Build aliases pointing to offsets into the combined global for each + // global from which we built the combined global, and replace references + // to the original globals with references to the aliases. + for (unsigned I = 0; I != Globals.size(); ++I) { + Constant *NewGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, I)}; + Constant *NewGlobalElemPtr = + ConstantExpr::getGetElementPtr(NewGlobal, NewGlobalIdxs); + GlobalAlias *GAlias = GlobalAlias::create( + Globals[I]->getType()->getElementType(), + Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(), + "", NewGlobalElemPtr, &M); + GAlias->takeName(Globals[I]); + Globals[I]->replaceAllUsesWith(GAlias); + Globals[I]->eraseFromParent(); + } + } + + return true; +} + +bool LowerBitSets::eraseBitSetGlobals(Module &M) { + bool Changed = false; + for (auto I = M.global_begin(), E = M.global_end(); I != E; ) { + GlobalVariable *G = &*I++; + + if (!G->isBitSet()) + continue; + + // These shouldn't have any uses other than calls to llvm.bitset.test, which + // should have already been eliminated by now. We do not define the behavior + // of any other uses, so RAUW with undef. + if (!G->use_empty()) + G->replaceAllUsesWith(UndefValue::get(G->getType())); + G->eraseFromParent(); + Changed = true; + } + + return Changed; +} + +bool LowerBitSets::runOnModule(Module &M) { + const DataLayout *DL = M.getDataLayout(); + if (!DL) + return false; + + bool Changed = buildBitSets(M, DL); + Changed |= eraseBitSetGlobals(M); + return Changed; +} Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -476,6 +476,9 @@ // currently it damages debug info. if (MergeFunctions) PM.add(createMergeFunctionsPass()); + + // Lower bitset constants to bitsets. + PM.add(createLowerBitSetsPass()); } void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, Index: test/Feature/bitset.ll =================================================================== --- /dev/null +++ test/Feature/bitset.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llvm-dis > %t1.ll +; RUN: FileCheck %s < %t1.ll +; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll +; RUN: diff %t1.ll %t2.ll + +; CHECK: @g = internal constant +@g = internal constant i8 1 + +; CHECK: @b = internal bitset constant +@b = internal bitset constant [1 x i8*] [ i8* @g ] Index: test/Transforms/LowerBitSets/simple.ll =================================================================== --- /dev/null +++ test/Transforms/LowerBitSets/simple.ll @@ -0,0 +1,107 @@ +; RUN: opt -S -lowerbitsets < %s | FileCheck %s + +target datalayout = "e-p:32:32" + +; CHECK: [[G:@[^ ]*]] = private constant { i32, i32, i32, [2 x i32] } { i32 1, i32 2, i32 3, [2 x i32] [i32 4, i32 5] } +@a = internal constant i32 1 +@b = internal constant i32 2 +@c = internal constant i32 3 +@d = internal constant [2 x i32] [i32 4, i32 5] + +; Offset 0, 4 byte alignment +; CHECK: @bitset1.bits = private constant [1 x i8] c"\13" +@bitset1 = internal bitset constant [3 x i32*] [ i32* @a, i32* @b, i32* getelementptr ([2 x i32]* @d, i32 0, i32 1) ] + +; Offset 4, 4 byte alignment +; CHECK: @bitset2.bits = private constant [1 x i8] c"\03" +@bitset2 = internal bitset constant [2 x i32*] [ i32* @b, i32* @c ] + +; Offset 0, 8 byte alignment +; CHECK: @bitset3.bits = private constant [1 x i8] c"\03" +@bitset3 = internal bitset constant [2 x i32*] [ i32* @a, i32* @c ] + +declare i1 @llvm.bitset.test(i8* %ptr, i8* %bitset) nounwind readnone + +; CHECK: @foo(i32* [[A0:%[^ ]*]]) +define i1 @foo(i32* %p) { + ; CHECK-NOT: llvm.bitset.test + + ; CHECK: [[R0:%[^ ]*]] = bitcast i32* [[A0]] to i8* + %pi8 = bitcast i32* %p to i8* + ; CHECK: [[R1:%[^ ]*]] = ptrtoint i8* [[R0]] to i32 + ; CHECK: [[R2:%[^ ]*]] = sub i32 [[R1]], ptrtoint ({ i32, i32, i32, [2 x i32] }* [[G]] to i32) + ; CHECK: [[R3:%[^ ]*]] = lshr i32 [[R2]], 2 + ; CHECK: [[R4:%[^ ]*]] = shl i32 [[R2]], 30 + ; CHECK: [[R5:%[^ ]*]] = or i32 [[R3]], [[R4]] + ; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 5 + ; CHECK: br i1 [[R6]] + + ; CHECK: [[R8:%[^ ]*]] = lshr i32 [[R2]], 5 + ; CHECK: [[R9:%[^ ]*]] = getelementptr [1 x i8]* @bitset1.bits, i32 0, i32 [[R8]] + ; CHECK: [[R10:%[^ ]*]] = load i8* [[R9]] + ; CHECK: [[R11:%[^ ]*]] = trunc i32 [[R5]] to i8 + ; CHECK: [[R12:%[^ ]*]] = and i8 [[R11]], 7 + ; CHECK: [[R13:%[^ ]*]] = lshr i8 [[R10]], [[R12]] + ; CHECK: [[R14:%[^ ]*]] = trunc i8 [[R13]] to i1 + + ; CHECK: [[R16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[R14]], {{%[^ ]*}} ] + %x = call i1 @llvm.bitset.test(i8* %pi8, i8* bitcast ([3 x i32*]* @bitset1 to i8*)) + + ; CHECK-NOT: llvm.bitset.test + %y = call i1 @llvm.bitset.test(i8* %pi8, i8* bitcast ([3 x i32*]* @bitset1 to i8*)) + + ; CHECK: ret i1 [[R16]] + ret i1 %x +} + +; CHECK: @bar(i32* [[B0:%[^ ]*]]) +define i1 @bar(i32* %p) { + ; CHECK: [[S0:%[^ ]*]] = bitcast i32* [[B0]] to i8* + %pi8 = bitcast i32* %p to i8* + ; CHECK: [[S1:%[^ ]*]] = ptrtoint i8* [[S0]] to i32 + ; CHECK: [[S2:%[^ ]*]] = sub i32 [[S1]], add (i32 ptrtoint ({ i32, i32, i32, [2 x i32] }* [[G]] to i32), i32 4) + ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 2 + ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 30 + ; CHECK: [[S5:%[^ ]*]] = or i32 [[S3]], [[S4]] + ; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 2 + ; CHECK: br i1 [[S6]] + + ; CHECK: [[S8:%[^ ]*]] = lshr i32 [[S2]], 5 + ; CHECK: [[S9:%[^ ]*]] = getelementptr [1 x i8]* @bitset2.bits, i32 0, i32 [[S8]] + ; CHECK: [[S10:%[^ ]*]] = load i8* [[S9]] + ; CHECK: [[S11:%[^ ]*]] = trunc i32 [[S5]] to i8 + ; CHECK: [[S12:%[^ ]*]] = and i8 [[S11]], 7 + ; CHECK: [[S13:%[^ ]*]] = lshr i8 [[S10]], [[S12]] + ; CHECK: [[S14:%[^ ]*]] = trunc i8 [[S13]] to i1 + + ; CHECK: [[S16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[S14]], {{%[^ ]*}} ] + %x = call i1 @llvm.bitset.test(i8* %pi8, i8* bitcast ([2 x i32*]* @bitset2 to i8*)) + ; CHECK: ret i1 [[S16]] + ret i1 %x +} + +; CHECK: @baz(i32* [[C0:%[^ ]*]]) +define i1 @baz(i32* %p) { + ; CHECK: [[T0:%[^ ]*]] = bitcast i32* [[C0]] to i8* + %pi8 = bitcast i32* %p to i8* + ; CHECK: [[T1:%[^ ]*]] = ptrtoint i8* [[T0]] to i32 + ; CHECK: [[T2:%[^ ]*]] = sub i32 [[T1]], ptrtoint ({ i32, i32, i32, [2 x i32] }* [[G]] to i32) + ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 3 + ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 29 + ; CHECK: [[T5:%[^ ]*]] = or i32 [[T3]], [[T4]] + ; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 2 + ; CHECK: br i1 [[T6]] + + ; CHECK: [[T8:%[^ ]*]] = lshr i32 [[T2]], 6 + ; CHECK: [[T9:%[^ ]*]] = getelementptr [1 x i8]* @bitset3.bits, i32 0, i32 [[T8]] + ; CHECK: [[T10:%[^ ]*]] = load i8* [[T9]] + ; CHECK: [[T11:%[^ ]*]] = trunc i32 [[T5]] to i8 + ; CHECK: [[T12:%[^ ]*]] = and i8 [[T11]], 7 + ; CHECK: [[T13:%[^ ]*]] = lshr i8 [[T10]], [[T12]] + ; CHECK: [[T14:%[^ ]*]] = trunc i8 [[T13]] to i1 + + ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T14]], {{%[^ ]*}} ] + %x = call i1 @llvm.bitset.test(i8* %pi8, i8* bitcast ([2 x i32*]* @bitset3 to i8*)) + ; CHECK: ret i1 [[T16]] + ret i1 %x +}