Index: docs/BitSets.rst =================================================================== --- /dev/null +++ docs/BitSets.rst @@ -0,0 +1,66 @@ +======= +Bitsets +======= + +This is a mechanism that allows IR modules to co-operatively build pointer +sets corresponding to addresses within a given set of globals. One example +of a use case for this is to allow a C++ program to efficiently verify (at +each call site) that a vtable pointer is in the set of valid vtable pointers +for the type of the class or its derived classes. + +To use the mechanism, a client creates a global metadata node named +``llvm.bitsets``. Each element is a metadata node with three elements: +the first is a metadata string containing an identifier for the bitset, +the second is a global variable and the third is a byte offset into the +global variable. + +This will cause a link-time optimization pass to generate bitsets from the +memory addresses referenced from the elements of the bitset metadata. The pass +will lay out the referenced globals consecutively, so their definitions must +be available at LTO time. An intrinsic, :ref:`llvm.bitset.test `, +generates code to test whether a given pointer is a member of a bitset. + +:Example: + +:: + + target datalayout = "e-p:32:32" + + @a = internal global i32 0 + @b = internal global i32 0 + @c = internal global i32 0 + @d = internal global [2 x i32] [i32 0, i32 0] + + !llvm.bitsets = !{!0, !1, !2, !3, !4} + + !0 = !{!"bitset1", i32* @a, i32 0} + !1 = !{!"bitset1", i32* @b, i32 0} + !2 = !{!"bitset2", i32* @b, i32 0} + !3 = !{!"bitset2", i32* @c, i32 0} + !4 = !{!"bitset2", i32* @d, i32 4} + + declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone + + define i1 @foo(i32* %p) { + %pi8 = bitcast i32* %p to i8* + %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1") + ret i1 %x + } + + define i1 @bar(i32* %p) { + %pi8 = bitcast i32* %p to i8* + %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset2") + ret i1 %x + } + + define void @main() { + %a1 = call i1 @foo(i32* @a) ; returns 1 + %b1 = call i1 @foo(i32* @b) ; returns 1 + %c1 = call i1 @foo(i32* @c) ; returns 0 + %a2 = call i1 @bar(i32* @a) ; returns 0 + %b2 = call i1 @bar(i32* @b) ; returns 1 + %c2 = call i1 @bar(i32* @c) ; returns 1 + %d02 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 0)) ; returns 0 + %d12 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 1)) ; returns 1 + ret void + } Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -3305,6 +3305,12 @@ !1 = !{!1} ; an identifier for the inner loop !2 = !{!2} ; an identifier for the outer loop +'``llvm.bitsets``' +^^^^^^^^^^^^^^^^^^ + +The ``llvm.bitsets`` global metadata is used to implement +:doc:`bitsets `. + Module Flags Metadata ===================== @@ -9888,6 +9894,31 @@ that the optimizer can otherwise deduce or facts that are of little use to the optimizer. +.. _bitset.test: + +'``llvm.bitset.test``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone + + +Arguments: +"""""""""" + +The first argument is a pointer to be tested. The second argument is a +metadata string containing the name of a :doc:`bitset `. + +Overview: +""""""""" + +The ``llvm.bitset.test`` intrinsic tests whether the given pointer is a +member of the given bitset. + '``llvm.donothing``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: docs/index.rst =================================================================== --- docs/index.rst +++ docs/index.rst @@ -244,6 +244,7 @@ CoverageMappingFormat Statepoints MergeFunctions + BitSets :doc:`WritingAnLLVMPass` Information on how to write LLVM transformations and analyses. Index: include/llvm/ADT/EquivalenceClasses.h =================================================================== --- include/llvm/ADT/EquivalenceClasses.h +++ include/llvm/ADT/EquivalenceClasses.h @@ -255,7 +255,7 @@ assert(Node != nullptr && "Dereferencing end()!"); return Node->getData(); } - reference operator->() const { return operator*(); } + pointer operator->() const { return &operator*(); } member_iterator &operator++() { assert(Node != nullptr && "++'d off the end of the list!"); Index: include/llvm/ADT/PointerUnion.h =================================================================== --- include/llvm/ADT/PointerUnion.h +++ include/llvm/ADT/PointerUnion.h @@ -195,6 +195,12 @@ return lhs.getOpaqueValue() != rhs.getOpaqueValue(); } + template + static bool operator<(PointerUnion lhs, + PointerUnion rhs) { + return lhs.getOpaqueValue() < rhs.getOpaqueValue(); + } + // Teach SmallPtrSet that PointerUnion is "basically a pointer", that has // # low bits available = min(PT1bits,PT2bits)-1. template Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -584,6 +584,11 @@ [LLVMPointerTo<0>, llvm_i32_ty, LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>], [IntrReadArgMem]>; + +// Intrinsics to support bit sets. +def int_bitset_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], + [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -177,6 +177,7 @@ void initializeLoopUnswitchPass(PassRegistry&); void initializeLoopIdiomRecognizePass(PassRegistry&); void initializeLowerAtomicPass(PassRegistry&); +void initializeLowerBitSetsPass(PassRegistry&); void initializeLowerExpectIntrinsicPass(PassRegistry&); void initializeLowerIntrinsicsPass(PassRegistry&); void initializeLowerInvokePass(PassRegistry&); Index: include/llvm/Transforms/IPO.h =================================================================== --- include/llvm/Transforms/IPO.h +++ include/llvm/Transforms/IPO.h @@ -199,6 +199,10 @@ /// manager. ModulePass *createBarrierNoopPass(); +/// \brief This pass lowers bitset metadata and the llvm.bitset.test intrinsic +/// to bitsets. +ModulePass *createLowerBitSetsPass(); + } // End llvm namespace #endif Index: include/llvm/Transforms/IPO/LowerBitSets.h =================================================================== --- /dev/null +++ include/llvm/Transforms/IPO/LowerBitSets.h @@ -0,0 +1,78 @@ +//===- LowerBitSets.h - Bitset lowering pass --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines parts of the bitset lowering pass implementation that may +// be usefully unit tested. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_LOWERBITSETS_H +#define LLVM_TRANSFORMS_IPO_LOWERBITSETS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + +#include +#include +#include + +namespace llvm { + +class DataLayout; +class GlobalVariable; +class Value; + +struct BitSetInfo { + // The actual bitset. + std::vector Bits; + + // The byte offset into the combined global represented by the bitset. + uint64_t ByteOffset; + + // The size of the bitset in bits. + uint64_t BitSize; + + // Log2 alignment of the bit set relative to the combined global. + // For example, a log2 alignment of 3 means that bits in the bitset + // represent addresses 8 bytes apart. + unsigned AlignLog2; + + bool isSingleOffset() const { + return Bits.size() == 1 && Bits[0] == 1; + } + + bool containsGlobalOffset(uint64_t Offset) const; + + bool containsValue(const DataLayout *DL, + const DenseMap &GlobalLayout, + Value *V, uint64_t COffset = 0) const; + +}; + +struct BitSetBuilder { + SmallVector Offsets; + uint64_t Min, Max; + + BitSetBuilder() : Min(std::numeric_limits::max()), Max(0) {} + + void addOffset(uint64_t Offset) { + if (Min > Offset) + Min = Offset; + if (Max < Offset) + Max = Offset; + + Offsets.push_back(Offset); + } + + BitSetInfo build(); +}; + +} // namespace llvm + +#endif Index: lib/Transforms/IPO/CMakeLists.txt =================================================================== --- lib/Transforms/IPO/CMakeLists.txt +++ lib/Transforms/IPO/CMakeLists.txt @@ -14,6 +14,7 @@ Inliner.cpp Internalize.cpp LoopExtractor.cpp + LowerBitSets.cpp MergeFunctions.cpp PartialInlining.cpp PassManagerBuilder.cpp Index: lib/Transforms/IPO/IPO.cpp =================================================================== --- lib/Transforms/IPO/IPO.cpp +++ lib/Transforms/IPO/IPO.cpp @@ -36,6 +36,7 @@ initializeLoopExtractorPass(Registry); initializeBlockExtractorPassPass(Registry); initializeSingleLoopExtractorPass(Registry); + initializeLowerBitSetsPass(Registry); initializeMergeFunctionsPass(Registry); initializePartialInlinerPass(Registry); initializePruneEHPass(Registry); Index: lib/Transforms/IPO/LowerBitSets.cpp =================================================================== --- /dev/null +++ lib/Transforms/IPO/LowerBitSets.cpp @@ -0,0 +1,512 @@ +//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic. +// See http://llvm.org/docs/LangRef.html#bitsets for more information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/LowerBitSets.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "lowerbitsets" + +STATISTIC(NumBitSetsCreated, "Number of bitsets created"); +STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered"); +STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets"); + +bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { + if (Offset < ByteOffset) + return false; + + if ((Offset - ByteOffset) % (1 << AlignLog2) != 0) + return false; + + uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2; + if (BitOffset >= BitSize) + return false; + + return (Bits[BitOffset / 8] >> (BitOffset % 8)) & 1; +} + +bool BitSetInfo::containsValue( + const DataLayout *DL, + const DenseMap &GlobalLayout, Value *V, + uint64_t COffset) const { + if (auto GV = dyn_cast(V)) { + auto I = GlobalLayout.find(GV); + if (I == GlobalLayout.end()) + return false; + return containsGlobalOffset(I->second + COffset); + } + + if (auto GEP = dyn_cast(V)) { + APInt APOffset(DL->getPointerSizeInBits(0), 0); + bool Result = GEP->accumulateConstantOffset(*DL, APOffset); + if (!Result) + return false; + COffset += APOffset.getZExtValue(); + return containsValue(DL, GlobalLayout, GEP->getPointerOperand(), + COffset); + } + + if (auto Op = dyn_cast(V)) { + if (Op->getOpcode() == Instruction::BitCast) + return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset); + + if (Op->getOpcode() == Instruction::Select) + return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) && + containsValue(DL, GlobalLayout, Op->getOperand(2), COffset); + } + + return false; +} + +BitSetInfo BitSetBuilder::build() { + if (Min > Max) + Min = 0; + + // Normalize each offset against the minimum observed offset, and compute + // the bitwise OR of each of the offsets. The number of trailing zeros + // in the mask gives us the log2 of the alignment of all offsets, which + // allows us to compress the bitset by only storing one bit per aligned + // address. + uint64_t Mask = 0; + for (uint64_t &Offset : Offsets) { + Offset -= Min; + Mask |= Offset; + } + + BitSetInfo BSI; + BSI.ByteOffset = Min; + + BSI.AlignLog2 = 0; + // FIXME: Can probably do something smarter if all offsets are 0. + if (Mask != 0) + BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined); + + // Build the compressed bitset while normalizing the offsets against the + // computed alignment. + BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; + uint64_t ByteSize = (BSI.BitSize + 7) / 8; + BSI.Bits.resize(ByteSize); + for (uint64_t Offset : Offsets) { + Offset >>= BSI.AlignLog2; + BSI.Bits[Offset / 8] |= 1 << (Offset % 8); + } + + return BSI; +} + +namespace { + +struct LowerBitSets : public ModulePass { + static char ID; + LowerBitSets() : ModulePass(ID) { + initializeLowerBitSetsPass(*PassRegistry::getPassRegistry()); + } + + const DataLayout *DL; + IntegerType *Int1Ty; + IntegerType *Int8Ty; + IntegerType *Int32Ty; + Type *Int32PtrTy; + IntegerType *Int64Ty; + Type *IntPtrTy; + + // The llvm.bitsets named metadata. + NamedMDNode *BitSetNM; + + // Mapping from bitset mdstrings to the call sites that test them. + DenseMap> BitSetTestCallSites; + + BitSetInfo + buildBitSet(MDString *BitSet, + const DenseMap &GlobalLayout); + void + lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI, + GlobalVariable *BitSetGlobal, GlobalVariable *CombinedGlobal, + const DenseMap &GlobalLayout); + void buildBitSetsFromGlobals(Module &M, + const std::vector &BitSets, + const std::vector &Globals); + bool buildBitSets(Module &M); + bool eraseBitSetMetadata(Module &M); + + bool doInitialization(Module &M) override; + bool runOnModule(Module &M) override; +}; + +} // namespace + +INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets", + "Lower bitset metadata", false, false) +INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets", + "Lower bitset metadata", false, false) +char LowerBitSets::ID = 0; + +ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; } + +bool LowerBitSets::doInitialization(Module &M) { + DL = M.getDataLayout(); + if (!DL) + report_fatal_error("Data layout required"); + + Int1Ty = Type::getInt1Ty(M.getContext()); + Int8Ty = Type::getInt8Ty(M.getContext()); + Int32Ty = Type::getInt32Ty(M.getContext()); + Int32PtrTy = PointerType::getUnqual(Int32Ty); + Int64Ty = Type::getInt64Ty(M.getContext()); + IntPtrTy = DL->getIntPtrType(M.getContext(), 0); + + BitSetNM = M.getNamedMetadata("llvm.bitsets"); + + BitSetTestCallSites.clear(); + + return false; +} + +BitSetInfo LowerBitSets::buildBitSet( + MDString *BitSet, + const DenseMap &GlobalLayout) { + BitSetBuilder BSB; + + // Compute the byte offset of each element of this bitset. + if (BitSetNM) { + for (MDNode *Op : BitSetNM->operands()) { + if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) + continue; + auto OpGlobal = cast( + cast(Op->getOperand(1))->getValue()); + uint64_t Offset = + cast(cast(Op->getOperand(2)) + ->getValue())->getZExtValue(); + + Offset += GlobalLayout.find(OpGlobal)->second; + + BSB.addOffset(Offset); + } + } + + return BSB.build(); +} + +static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, + Value *BitOffset) { + auto BitsType = cast(Bits->getType()); + unsigned BitWidth = BitsType->getBitWidth(); + + BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType); + Value *BitIndex = + B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1)); + Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex); + Value *MaskedBits = B.CreateAnd(Bits, BitMask); + return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); +} + +void LowerBitSets::lowerBitSetCall( + CallInst *CI, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal, + GlobalVariable *CombinedGlobal, + const DenseMap &GlobalLayout) { + Value *Ptr = CI->getArgOperand(0); + + if (BSI.containsValue(DL, GlobalLayout, Ptr)) { + CI->replaceAllUsesWith( + ConstantInt::getTrue(BitSetGlobal->getParent()->getContext())); + CI->eraseFromParent(); + return; + } + + Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy); + Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( + GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); + + BasicBlock *InitialBB = CI->getParent(); + + IRBuilder<> B(CI); + + Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); + + if (BSI.isSingleOffset()) { + Value *Eq = B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); + CI->replaceAllUsesWith(Eq); + CI->eraseFromParent(); + return; + } + + Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); + + Value *BitOffset; + if (BSI.AlignLog2 == 0) { + BitOffset = PtrOffset; + } else { + // We need to check that the offset both falls within our range and is + // suitably aligned. We can check both properties at the same time by + // performing a right rotate by log2(alignment) followed by an integer + // comparison against the bitset size. The rotate will move the lower + // order bits that need to be zero into the higher order bits of the + // result, causing the comparison to fail if they are nonzero. The rotate + // also conveniently gives us a bit offset to use during the load from + // the bitset. + Value *OffsetSHR = + B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2)); + Value *OffsetSHL = B.CreateShl( + PtrOffset, ConstantInt::get(IntPtrTy, DL->getPointerSizeInBits(0) - + BSI.AlignLog2)); + BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); + } + + Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize); + Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst); + + TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); + IRBuilder<> ThenB(Term); + + // Now that we know that the offset is in range and aligned, load the + // appropriate bit from the bitset. This pattern matches to the bt instruction + // on x86. TODO: We might want to use the memory variant of the bt instruction + // with the previously computed bit offset at -Os. This instruction does + // exactly what we want but has been benchmarked as being slower than open + // coding the load+bt. + Value *Bit; + if (BSI.Bits.size() <= 8) { + // If the bit set is sufficiently small, we can avoid a load by bit testing + // a constant. + IntegerType *BitsTy; + if (BSI.Bits.size() <= 4) + BitsTy = Int32Ty; + else + BitsTy = Int64Ty; + + uint64_t Bits = 0; + for (auto I = BSI.Bits.rbegin(), E = BSI.Bits.rend(); I != E; ++I) { + Bits <<= 8; + Bits |= *I; + } + Constant *BitsConst = ConstantInt::get(BitsTy, Bits); + Bit = createMaskedBitTest(ThenB, BitsConst, BitOffset); + } else { + Value *BitSetGlobalOffset = + ThenB.CreateLShr(BitOffset, ConstantInt::get(IntPtrTy, 5)); + Value *BitSetEntryAddr = ThenB.CreateGEP( + ConstantExpr::getBitCast(BitSetGlobal, Int32PtrTy), BitSetGlobalOffset); + Value *BitSetEntry = ThenB.CreateLoad(BitSetEntryAddr); + + Bit = createMaskedBitTest(ThenB, BitSetEntry, BitOffset); + } + + // The value we want is 0 if we came directly from the initial block + // (having failed the range or alignment checks), or the loaded bit if + // we came from the block in which we loaded it. + B.SetInsertPoint(CI); + PHINode *P = B.CreatePHI(Int1Ty, 2); + P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); + P->addIncoming(Bit, ThenB.GetInsertBlock()); + + CI->replaceAllUsesWith(P); + CI->eraseFromParent(); +} + +void LowerBitSets::buildBitSetsFromGlobals( + Module &M, + const std::vector &BitSets, + const std::vector &Globals) { + // Build a new global with the combined contents of the referenced globals. + std::vector GlobalInits; + for (GlobalVariable *G : Globals) + GlobalInits.push_back(G->getInitializer()); + Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); + auto CombinedGlobal = + new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, NewInit); + + const StructLayout *CombinedGlobalLayout = + DL->getStructLayout(cast(NewInit->getType())); + + // Compute the offsets of the original globals within the new global. + DenseMap GlobalLayout; + for (unsigned I = 0; I != Globals.size(); ++I) { + GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I); + } + + // For each bitset in this disjoint set... + for (MDString *BS : BitSets) { + // Build the bitset. + BitSetInfo BSI = buildBitSet(BS, GlobalLayout); + + // Create a global in which to store it. + ++NumBitSetsCreated; + Constant *BitsConst = ConstantDataArray::get(M.getContext(), BSI.Bits); + auto BitSetGlobal = new GlobalVariable( + M, BitsConst->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, BitsConst, BS->getString() + ".bits"); + + // Lower each call to llvm.bitset.test for this bitset. + for (CallInst *CI : BitSetTestCallSites[BS]) { + ++NumBitSetCallsLowered; + lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout); + } + } + + // Build aliases pointing to offsets into the combined global for each + // global from which we built the combined global, and replace references + // to the original globals with references to the aliases. + for (unsigned I = 0; I != Globals.size(); ++I) { + Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, I)}; + Constant *CombinedGlobalElemPtr = + ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs); + GlobalAlias *GAlias = GlobalAlias::create( + Globals[I]->getType()->getElementType(), + Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(), + "", CombinedGlobalElemPtr, &M); + GAlias->takeName(Globals[I]); + Globals[I]->replaceAllUsesWith(GAlias); + Globals[I]->eraseFromParent(); + } +} + +bool LowerBitSets::buildBitSets(Module &M) { + Function *BitSetTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::bitset_test)); + if (!BitSetTestFunc) + return false; + + // Equivalence class set containing bitsets and the globals they reference. + // This is used to partition the set of bitsets in the module into disjoint + // sets. + typedef EquivalenceClasses> + GlobalClassesTy; + GlobalClassesTy GlobalClasses; + + for (const Use &U : BitSetTestFunc->uses()) { + auto CI = cast(U.getUser()); + + auto BitSetMDVal = dyn_cast(CI->getArgOperand(1)); + if (!BitSetMDVal || !isa(BitSetMDVal->getMetadata())) + report_fatal_error( + "Second argument of llvm.bitset.test must be metadata string"); + auto BitSet = cast(BitSetMDVal->getMetadata()); + + // Add the call site to the list of call sites for this bit set. We also use + // BitSetTestCallSites to keep track of whether we have seen this bit set + // before. If we have, we don't need to re-add the referenced globals to the + // equivalence class. + std::pair>::iterator, + bool> Ins = + BitSetTestCallSites.insert( + std::make_pair(BitSet, std::vector())); + Ins.first->second.push_back(CI); + if (!Ins.second) + continue; + + // Add the bitset to the equivalence class. + GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet); + GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); + + if (!BitSetNM) + continue; + + // Verify the bitset metadata and add the referenced globals to the bitset's + // equivalence class. + for (MDNode *Op : BitSetNM->operands()) { + if (Op->getNumOperands() != 3) + report_fatal_error( + "All operands of llvm.bitsets metadata must have 3 elements"); + + if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) + continue; + + auto OpConstMD = dyn_cast(Op->getOperand(1)); + if (!OpConstMD) + report_fatal_error("Bit set element must be a constant"); + auto OpGlobal = dyn_cast(OpConstMD->getValue()); + if (!OpGlobal) + report_fatal_error("Bit set element must refer to global"); + + auto OffsetConstMD = dyn_cast(Op->getOperand(2)); + if (!OffsetConstMD) + report_fatal_error("Bit set element offset must be a constant"); + auto OffsetInt = dyn_cast(OffsetConstMD->getValue()); + if (!OffsetInt) + report_fatal_error( + "Bit set element offset must be an integer constant"); + + CurSet = GlobalClasses.unionSets( + CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal))); + } + } + + if (GlobalClasses.empty()) + return false; + + // For each disjoint set we found... + for (GlobalClassesTy::iterator I = GlobalClasses.begin(), + E = GlobalClasses.end(); + I != E; ++I) { + if (!I->isLeader()) continue; + + ++NumBitSetDisjointSets; + + // Build the list of bitsets and referenced globals in this disjoint set. + std::vector BitSets; + std::vector Globals; + for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); + MI != GlobalClasses.member_end(); ++MI) { + if ((*MI).is()) + BitSets.push_back(MI->get()); + else + Globals.push_back(MI->get()); + } + + // Order bitsets and globals by name for determinism. TODO: We may later + // want to use a more sophisticated ordering that lays out globals so as to + // minimize the sizes of the bitsets. + std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) { + return S1->getString() < S2->getString(); + }); + std::sort(Globals.begin(), Globals.end(), + [](GlobalVariable *GV1, GlobalVariable *GV2) { + return GV1->getName() < GV2->getName(); + }); + + // Build the bitsets from this disjoint set. + buildBitSetsFromGlobals(M, BitSets, Globals); + } + + return true; +} + +bool LowerBitSets::eraseBitSetMetadata(Module &M) { + if (!BitSetNM) + return false; + + M.eraseNamedMetadata(BitSetNM); + return true; +} + +bool LowerBitSets::runOnModule(Module &M) { + bool Changed = buildBitSets(M); + Changed |= eraseBitSetMetadata(M); + return Changed; +} Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -466,6 +466,9 @@ PM.add(createJumpThreadingPass()); + // Lower bitset metadata to bitsets. + PM.add(createLowerBitSetsPass()); + // Delete basic blocks, which optimization passes may have killed. PM.add(createCFGSimplificationPass()); Index: test/Transforms/LowerBitSets/constant.ll =================================================================== --- /dev/null +++ test/Transforms/LowerBitSets/constant.ll @@ -0,0 +1,34 @@ +; RUN: opt -S -lowerbitsets < %s | FileCheck %s + +target datalayout = "e-p:32:32" + +@a = constant i32 1 +@b = constant [2 x i32] [i32 2, i32 3] + +!0 = !{!"bitset1", i32* @a, i32 0} +!1 = !{!"bitset1", [2 x i32]* @b, i32 4} + +!llvm.bitsets = !{ !0, !1 } + +declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone + +; CHECK: @foo( +define i1 @foo() { + ; CHECK: ret i1 true + %x = call i1 @llvm.bitset.test(i8* bitcast (i32* @a to i8*), metadata !"bitset1") + ret i1 %x +} + +; CHECK: @bar( +define i1 @bar() { + ; CHECK: ret i1 true + %x = call i1 @llvm.bitset.test(i8* bitcast (i32* getelementptr ([2 x i32]* @b, i32 0, i32 1) to i8*), metadata !"bitset1") + ret i1 %x +} + +; CHECK: @baz( +define i1 @baz() { + ; CHECK-NOT: ret i1 true + %x = call i1 @llvm.bitset.test(i8* bitcast (i32* getelementptr ([2 x i32]* @b, i32 0, i32 0) to i8*), metadata !"bitset1") + ret i1 %x +} Index: test/Transforms/LowerBitSets/simple.ll =================================================================== --- /dev/null +++ test/Transforms/LowerBitSets/simple.ll @@ -0,0 +1,127 @@ +; RUN: opt -S -lowerbitsets < %s | FileCheck %s +; RUN: opt -S -O3 < %s | FileCheck -check-prefix=CHECK-NODISCARD %s + +target datalayout = "e-p:32:32" + +; CHECK: [[G:@[^ ]*]] = private constant { i32, [63 x i32], i32, [2 x i32] } { i32 1, [63 x i32] zeroinitializer, i32 3, [2 x i32] [i32 4, i32 5] } +@a = constant i32 1 +@b = constant [63 x i32] zeroinitializer +@c = constant i32 3 +@d = constant [2 x i32] [i32 4, i32 5] + +; Offset 0, 4 byte alignment +; CHECK: @bitset1.bits = private constant [9 x i8] c"\03\00\00\00\00\00\00\00\04" +!0 = !{!"bitset1", i32* @a, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset1", i32* @a, i32 0} +!1 = !{!"bitset1", [63 x i32]* @b, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset1", [63 x i32]* @b, i32 0} +!2 = !{!"bitset1", [2 x i32]* @d, i32 4} +; CHECK-NODISCARD-DAG: !{!"bitset1", [2 x i32]* @d, i32 4} + +; Offset 4, 4 byte alignment +; CHECK: @bitset2.bits = private constant [8 x i8] c"\01\00\00\00\00\00\00\80" +!3 = !{!"bitset2", [63 x i32]* @b, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset2", [63 x i32]* @b, i32 0} +!4 = !{!"bitset2", i32* @c, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @c, i32 0} + +; Offset 0, 256 byte alignment +; CHECK: @bitset3.bits = private constant [1 x i8] c"\03" +!5 = !{!"bitset3", i32* @a, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0} +!6 = !{!"bitset3", i32* @c, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @c, i32 0} + +; Entries whose second operand is null (the result of a global being DCE'd) +; should be ignored. +!7 = !{!"bitset2", null, i32 0} + +!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 } + +; CHECK: @a = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 0) +; CHECK: @b = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 1) +; CHECK: @c = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 2) +; CHECK: @d = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 3) + +declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone + +; CHECK: @foo(i32* [[A0:%[^ ]*]]) +define i1 @foo(i32* %p) { + ; CHECK-NOT: llvm.bitset.test + + ; CHECK: [[R0:%[^ ]*]] = bitcast i32* [[A0]] to i8* + %pi8 = bitcast i32* %p to i8* + ; CHECK: [[R1:%[^ ]*]] = ptrtoint i8* [[R0]] to i32 + ; CHECK: [[R2:%[^ ]*]] = sub i32 [[R1]], ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32) + ; CHECK: [[R3:%[^ ]*]] = lshr i32 [[R2]], 2 + ; CHECK: [[R4:%[^ ]*]] = shl i32 [[R2]], 30 + ; CHECK: [[R5:%[^ ]*]] = or i32 [[R3]], [[R4]] + ; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 67 + ; CHECK: br i1 [[R6]] + + ; CHECK: [[R8:%[^ ]*]] = lshr i32 [[R5]], 5 + ; CHECK: [[R9:%[^ ]*]] = getelementptr i32* bitcast ([9 x i8]* @bitset1.bits to i32*), i32 [[R8]] + ; CHECK: [[R10:%[^ ]*]] = load i32* [[R9]] + ; CHECK: [[R11:%[^ ]*]] = and i32 [[R5]], 31 + ; CHECK: [[R12:%[^ ]*]] = shl i32 1, [[R11]] + ; CHECK: [[R13:%[^ ]*]] = and i32 [[R10]], [[R12]] + ; CHECK: [[R14:%[^ ]*]] = icmp ne i32 [[R13]], 0 + + ; CHECK: [[R16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[R14]], {{%[^ ]*}} ] + %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1") + + ; CHECK-NOT: llvm.bitset.test + %y = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1") + + ; CHECK: ret i1 [[R16]] + ret i1 %x +} + +; CHECK: @bar(i32* [[B0:%[^ ]*]]) +define i1 @bar(i32* %p) { + ; CHECK: [[S0:%[^ ]*]] = bitcast i32* [[B0]] to i8* + %pi8 = bitcast i32* %p to i8* + ; CHECK: [[S1:%[^ ]*]] = ptrtoint i8* [[S0]] to i32 + ; CHECK: [[S2:%[^ ]*]] = sub i32 [[S1]], add (i32 ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32), i32 4) + ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 2 + ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 30 + ; CHECK: [[S5:%[^ ]*]] = or i32 [[S3]], [[S4]] + ; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 64 + ; CHECK: br i1 [[S6]] + + ; CHECK: [[S8:%[^ ]*]] = zext i32 [[S5]] to i64 + ; CHECK: [[S9:%[^ ]*]] = and i64 [[S8]], 63 + ; CHECK: [[S10:%[^ ]*]] = shl i64 1, [[S9]] + ; CHECK: [[S11:%[^ ]*]] = and i64 -9223372036854775807, [[S10]] + ; CHECK: [[S12:%[^ ]*]] = icmp ne i64 [[S11]], 0 + + ; CHECK: [[S16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[S12]], {{%[^ ]*}} ] + %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset2") + ; CHECK: ret i1 [[S16]] + ret i1 %x +} + +; CHECK: @baz(i32* [[C0:%[^ ]*]]) +define i1 @baz(i32* %p) { + ; CHECK: [[T0:%[^ ]*]] = bitcast i32* [[C0]] to i8* + %pi8 = bitcast i32* %p to i8* + ; CHECK: [[T1:%[^ ]*]] = ptrtoint i8* [[T0]] to i32 + ; CHECK: [[T2:%[^ ]*]] = sub i32 [[T1]], ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32) + ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 8 + ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 24 + ; CHECK: [[T5:%[^ ]*]] = or i32 [[T3]], [[T4]] + ; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 2 + ; CHECK: br i1 [[T6]] + + ; CHECK: [[T8:%[^ ]*]] = and i32 [[T5]], 31 + ; CHECK: [[T9:%[^ ]*]] = shl i32 1, [[T8]] + ; CHECK: [[T10:%[^ ]*]] = and i32 3, [[T9]] + ; CHECK: [[T11:%[^ ]*]] = icmp ne i32 [[T10]], 0 + + ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T11]], {{%[^ ]*}} ] + %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3") + ; CHECK: ret i1 [[T16]] + ret i1 %x +} + +; CHECK-NOT: !llvm.bitsets Index: test/Transforms/LowerBitSets/single-offset.ll =================================================================== --- /dev/null +++ test/Transforms/LowerBitSets/single-offset.ll @@ -0,0 +1,40 @@ +; RUN: opt -S -lowerbitsets < %s | FileCheck %s + +target datalayout = "e-p:32:32" + +; CHECK: [[G:@[^ ]*]] = private constant { i32, i32 } +@a = constant i32 1 +@b = constant i32 2 + +!0 = !{!"bitset1", i32* @a, i32 0} +!1 = !{!"bitset1", i32* @b, i32 0} +!2 = !{!"bitset2", i32* @a, i32 0} +!3 = !{!"bitset3", i32* @b, i32 0} + +!llvm.bitsets = !{ !0, !1, !2, !3 } + +declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone + +; CHECK: @foo(i8* [[A0:%[^ ]*]]) +define i1 @foo(i8* %p) { + ; CHECK: [[R0:%[^ ]*]] = ptrtoint i8* [[A0]] to i32 + ; CHECK: [[R1:%[^ ]*]] = icmp eq i32 [[R0]], ptrtoint ({ i32, i32 }* [[G]] to i32) + %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset2") + ; CHECK: ret i1 [[R1]] + ret i1 %x +} + +; CHECK: @bar(i8* [[B0:%[^ ]*]]) +define i1 @bar(i8* %p) { + ; CHECK: [[S0:%[^ ]*]] = ptrtoint i8* [[B0]] to i32 + ; CHECK: [[S1:%[^ ]*]] = icmp eq i32 [[S0]], add (i32 ptrtoint ({ i32, i32 }* [[G]] to i32), i32 4) + %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset3") + ; CHECK: ret i1 [[S1]] + ret i1 %x +} + +; CHECK: @x( +define i1 @x(i8* %p) { + %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset1") + ret i1 %x +} Index: unittests/Transforms/CMakeLists.txt =================================================================== --- unittests/Transforms/CMakeLists.txt +++ unittests/Transforms/CMakeLists.txt @@ -1 +1,2 @@ +add_subdirectory(IPO) add_subdirectory(Utils) Index: unittests/Transforms/IPO/CMakeLists.txt =================================================================== --- /dev/null +++ unittests/Transforms/IPO/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + IPO + ) + +add_llvm_unittest(IPOTests + LowerBitSets.cpp + ) Index: unittests/Transforms/IPO/LowerBitSets.cpp =================================================================== --- /dev/null +++ unittests/Transforms/IPO/LowerBitSets.cpp @@ -0,0 +1,50 @@ +//===- LowerBitSets.cpp - Unit tests for bitset lowering ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/LowerBitSets.h" +#include "gtest/gtest.h" + +using namespace llvm; + +TEST(LowerBitSets, BitSetBuilder) { + struct { + std::vector Offsets; + std::vector Bits; + uint64_t ByteOffset; + uint64_t BitSize; + unsigned AlignLog2; + bool IsSingleOffset; + } BSBTests[] = { + {{}, {0}, 0, 1, 0, false}, + {{0}, {1}, 0, 1, 0, true}, + {{4}, {1}, 4, 1, 0, true}, + {{37}, {1}, 37, 1, 0, true}, + {{0, 1}, {3}, 0, 2, 0, false}, + {{0, 4}, {3}, 0, 2, 2, false}, + {{3, 7}, {3}, 3, 2, 2, false}, + {{0, 1, 7}, {131}, 0, 8, 0, false}, + {{0, 2, 14}, {131}, 0, 8, 1, false}, + {{0, 1, 8}, {3, 1}, 0, 9, 0, false}, + {{0, 2, 16}, {3, 1}, 0, 9, 1, false}, + }; + + for (auto &&T : BSBTests) { + BitSetBuilder BSB; + for (auto Offset : T.Offsets) + BSB.addOffset(Offset); + + BitSetInfo BSI = BSB.build(); + + EXPECT_EQ(T.Bits, BSI.Bits); + EXPECT_EQ(T.ByteOffset, BSI.ByteOffset); + EXPECT_EQ(T.BitSize, BSI.BitSize); + EXPECT_EQ(T.AlignLog2, BSI.AlignLog2); + EXPECT_EQ(T.IsSingleOffset, BSI.isSingleOffset()); + } +} Index: unittests/Transforms/IPO/Makefile =================================================================== --- unittests/Transforms/IPO/Makefile +++ unittests/Transforms/IPO/Makefile @@ -1,4 +1,4 @@ -##===- unittests/Transforms/Makefile -----------------------*- Makefile -*-===## +##===- unittests/Transforms/IPO/Makefile -------------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # @@ -7,11 +7,9 @@ # ##===----------------------------------------------------------------------===## -LEVEL = ../.. +LEVEL = ../../.. +TESTNAME = IPO +LINK_COMPONENTS := IPO -PARALLEL_DIRS = Utils - -include $(LEVEL)/Makefile.common - -clean:: - $(Verb) $(RM) -f *Tests +include $(LEVEL)/Makefile.config +include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest Index: unittests/Transforms/Makefile =================================================================== --- unittests/Transforms/Makefile +++ unittests/Transforms/Makefile @@ -9,7 +9,7 @@ LEVEL = ../.. -PARALLEL_DIRS = Utils +PARALLEL_DIRS = IPO Utils include $(LEVEL)/Makefile.common