Index: docs/BitSets.rst =================================================================== --- /dev/null +++ docs/BitSets.rst @@ -0,0 +1,66 @@ +======= +Bitsets +======= + +This is a mechanism that allows IR modules to co-operatively build pointer +sets corresponding to addresses within a given set of globals. One example +of a use case for this is to allow a C++ program to efficiently verify (at +each call site) that a vtable pointer is in the set of valid vtable pointers +for the type of the class or its derived classes. + +To use the mechanism, a client creates a global metadata node named +``llvm.bitsets``. Each element is a metadata node with three elements: +the first is a metadata string containing an identifier for the bitset, +the second is a global variable and the third is a byte offset into the +global variable. + +This will cause a link-time optimization pass to generate bitsets from the +memory addresses referenced from the elements of the bitset metadata. The pass +will lay out the referenced globals consecutively, so their definitions must +be available at LTO time. An intrinsic, :ref:`llvm.bitset.test `, +generates code to test whether a given pointer is a member of a bitset. + +:Example: + +:: + + target datalayout = "e-p:32:32" + + @a = internal global i32 0 + @b = internal global i32 0 + @c = internal global i32 0 + @d = internal global [2 x i32] [i32 0, i32 0] + + !llvm.bitsets = !{!0, !1, !2, !3, !4} + + !0 = !{!"bitset1", i32* @a, i32 0} + !1 = !{!"bitset1", i32* @b, i32 0} + !2 = !{!"bitset2", i32* @b, i32 0} + !3 = !{!"bitset2", i32* @c, i32 0} + !4 = !{!"bitset2", i32* @d, i32 4} + + declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone + + define i1 @foo(i32* %p) { + %pi8 = bitcast i32* %p to i8* + %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1") + ret i1 %x + } + + define i1 @bar(i32* %p) { + %pi8 = bitcast i32* %p to i8* + %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset2") + ret i1 %x + } + + define void @main() { + %a1 = call i1 @foo(i32* @a) ; returns 1 + %b1 = call i1 @foo(i32* @b) ; returns 1 + %c1 = call i1 @foo(i32* @c) ; returns 0 + %a2 = call i1 @bar(i32* @a) ; returns 0 + %b2 = call i1 @bar(i32* @b) ; returns 1 + %c2 = call i1 @bar(i32* @c) ; returns 1 + %d02 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 0)) ; returns 0 + %d12 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 1)) ; returns 1 + ret void + } Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -3305,6 +3305,12 @@ !1 = !{!1} ; an identifier for the inner loop !2 = !{!2} ; an identifier for the outer loop +'``llvm.bitsets``' +^^^^^^^^^^^^^^^^^^ + +The ``llvm.bitsets`` global metadata is used to implement +:doc:`bitsets `. + Module Flags Metadata ===================== @@ -9888,6 +9894,31 @@ that the optimizer can otherwise deduce or facts that are of little use to the optimizer. +.. _bitset.test: + +'``llvm.bitset.test``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone + + +Arguments: +"""""""""" + +The first argument is a pointer to be tested. The second argument is a +metadata string containing the name of a :doc:`bitset `. + +Overview: +""""""""" + +The ``llvm.bitset.test`` intrinsic tests whether the given pointer is a +member of the given bitset. + '``llvm.donothing``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: docs/index.rst =================================================================== --- docs/index.rst +++ docs/index.rst @@ -244,6 +244,7 @@ CoverageMappingFormat Statepoints MergeFunctions + BitSets :doc:`WritingAnLLVMPass` Information on how to write LLVM transformations and analyses. Index: include/llvm/ADT/EquivalenceClasses.h =================================================================== --- include/llvm/ADT/EquivalenceClasses.h +++ include/llvm/ADT/EquivalenceClasses.h @@ -255,7 +255,7 @@ assert(Node != nullptr && "Dereferencing end()!"); return Node->getData(); } - reference operator->() const { return operator*(); } + pointer operator->() const { return &operator*(); } member_iterator &operator++() { assert(Node != nullptr && "++'d off the end of the list!"); Index: include/llvm/ADT/PointerUnion.h =================================================================== --- include/llvm/ADT/PointerUnion.h +++ include/llvm/ADT/PointerUnion.h @@ -195,6 +195,12 @@ return lhs.getOpaqueValue() != rhs.getOpaqueValue(); } + template + static bool operator<(PointerUnion lhs, + PointerUnion rhs) { + return lhs.getOpaqueValue() < rhs.getOpaqueValue(); + } + // Teach SmallPtrSet that PointerUnion is "basically a pointer", that has // # low bits available = min(PT1bits,PT2bits)-1. template Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -584,6 +584,11 @@ [LLVMPointerTo<0>, llvm_i32_ty, LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>], [IntrReadArgMem]>; + +// Intrinsics to support bit sets. +def int_bitset_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], + [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -177,6 +177,7 @@ void initializeLoopUnswitchPass(PassRegistry&); void initializeLoopIdiomRecognizePass(PassRegistry&); void initializeLowerAtomicPass(PassRegistry&); +void initializeLowerBitSetsPass(PassRegistry&); void initializeLowerExpectIntrinsicPass(PassRegistry&); void initializeLowerIntrinsicsPass(PassRegistry&); void initializeLowerInvokePass(PassRegistry&); Index: include/llvm/Transforms/IPO.h =================================================================== --- include/llvm/Transforms/IPO.h +++ include/llvm/Transforms/IPO.h @@ -199,6 +199,10 @@ /// manager. ModulePass *createBarrierNoopPass(); +/// \brief This pass lowers bitset metadata and the llvm.bitset.test intrinsic +/// to bitsets. +ModulePass *createLowerBitSetsPass(); + } // End llvm namespace #endif Index: lib/Transforms/IPO/CMakeLists.txt =================================================================== --- lib/Transforms/IPO/CMakeLists.txt +++ lib/Transforms/IPO/CMakeLists.txt @@ -14,6 +14,7 @@ Inliner.cpp Internalize.cpp LoopExtractor.cpp + LowerBitSets.cpp MergeFunctions.cpp PartialInlining.cpp PassManagerBuilder.cpp Index: lib/Transforms/IPO/IPO.cpp =================================================================== --- lib/Transforms/IPO/IPO.cpp +++ lib/Transforms/IPO/IPO.cpp @@ -36,6 +36,7 @@ initializeLoopExtractorPass(Registry); initializeBlockExtractorPassPass(Registry); initializeSingleLoopExtractorPass(Registry); + initializeLowerBitSetsPass(Registry); initializeMergeFunctionsPass(Registry); initializePartialInlinerPass(Registry); initializePruneEHPass(Registry); Index: lib/Transforms/IPO/LowerBitSets.cpp =================================================================== --- /dev/null +++ lib/Transforms/IPO/LowerBitSets.cpp @@ -0,0 +1,424 @@ +//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic. +// See http://llvm.org/docs/LangRef.html#bitsets for more information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +namespace { + +struct LowerBitSets : public ModulePass { + static char ID; + LowerBitSets() : ModulePass(ID) { + initializeLowerBitSetsPass(*PassRegistry::getPassRegistry()); + } + + const DataLayout *DL; + Type *Int1Ty; + Type *Int8Ty; + Type *Int32Ty; + Type *IntPtrTy; + + // The llvm.bitsets named metadata. + NamedMDNode *BitSetNM; + + // Mapping from bitset mdstrings to the call sites that test them. + DenseMap> BitSetTestCallSites; + + struct BitSetInfo { + // A constant array representing the actual bitset. + Constant *BitsConst; + + // The byte offset into the combined global represented by the bitset. + uint64_t ByteOffset; + + // The size of the bitset in bits. + uint64_t BitSize; + + // Log2 alignment of the bit set relative to the combined global. + // For example, a log2 alignment of 3 means that bits in the bitset + // represent addresses 8 bytes apart. + unsigned AlignLog2; + }; + + BitSetInfo + buildBitSet(Module &M, MDString *BitSet, + const DenseMap &GlobalLayout); + void lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI, + GlobalVariable *BitSetGlobal, + GlobalVariable *CombinedGlobal); + void buildBitSetsFromGlobals(Module &M, + const std::vector &BitSets, + const std::vector &Globals); + bool buildBitSets(Module &M); + bool eraseBitSetMetadata(Module &M); + + bool doInitialization(Module &M) override; + bool runOnModule(Module &M) override; +}; + +} // namespace + +INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets", + "Lower bitset metadata", false, false) +INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets", + "Lower bitset metadata", false, false) +char LowerBitSets::ID = 0; + +ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; } + +bool LowerBitSets::doInitialization(Module &M) { + DL = M.getDataLayout(); + + Int1Ty = Type::getInt1Ty(M.getContext()); + Int8Ty = Type::getInt8Ty(M.getContext()); + Int32Ty = Type::getInt32Ty(M.getContext()); + if (DL) + IntPtrTy = DL->getIntPtrType(M.getContext(), 0); + + BitSetNM = M.getNamedMetadata("llvm.bitsets"); + + BitSetTestCallSites.clear(); + + return false; +} + +LowerBitSets::BitSetInfo LowerBitSets::buildBitSet( + Module &M, + MDString *BitSet, + const DenseMap &GlobalLayout) { + BitSetInfo BSI; + + std::vector Offsets; + uint64_t Min = std::numeric_limits::max(), Max = 0; + + // Compute the byte offset of each element of this bitset. + if (BitSetNM) { + for (MDNode *Op : BitSetNM->operands()) { + if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) + continue; + auto OpGlobal = cast( + cast(Op->getOperand(1))->getValue()); + uint64_t Offset = + cast(cast(Op->getOperand(2)) + ->getValue())->getZExtValue(); + + Offset += GlobalLayout.find(OpGlobal)->second; + + if (Min > Offset) + Min = Offset; + if (Max < Offset) + Max = Offset; + + Offsets.push_back(Offset); + } + } + + if (Min > Max) + Min = 0; + + // Normalize each offset against the minimum observed offset, and compute + // the bitwise OR of each of the offsets. The number of trailing zeros + // in the mask gives us the log2 of the alignment of all offsets, which + // allows us to compress the bitset by only storing one bit per aligned + // address. + uint64_t Mask = 0; + for (uint64_t &Offset : Offsets) { + Offset -= Min; + Mask |= Offset; + } + + BSI.AlignLog2 = 0; + // FIXME: Can probably do something smarter if all offsets are 0. + if (Mask != 0) + BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined); + + // Build the compressed bitset while normalizing the offsets against the + // computed alignment. + BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; + uint64_t ByteSize = (BSI.BitSize + 7) / 8; + std::vector Bits(ByteSize); + for (uint64_t Offset : Offsets) { + Offset >>= BSI.AlignLog2; + Bits[Offset / 8] |= 1 << (Offset % 8); + } + + BSI.BitsConst = ConstantDataArray::get(M.getContext(), Bits); + BSI.ByteOffset = Min; + + return BSI; +} + +void LowerBitSets::lowerBitSetCall(CallInst *CI, + const LowerBitSets::BitSetInfo &BSI, + GlobalVariable *BitSetGlobal, + GlobalVariable *CombinedGlobal) { + Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy); + Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( + GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); + Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize); + + Value *Ptr = CI->getArgOperand(0); + BasicBlock *InitialBB = CI->getParent(); + + IRBuilder<> B(CI); + + Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); + + Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); + + Value *BitOffset; + if (BSI.AlignLog2 == 0) { + BitOffset = PtrOffset; + } else { + // We need to check that the offset both falls within our range and is + // suitably aligned. We can check both properties at the same time by + // performing a right rotate by log2(alignment) followed by an integer + // comparison against the bitset size. The rotate will move the lower + // order bits that need to be zero into the higher order bits of the + // result, causing the comparison to fail if they are nonzero. The rotate + // also conveniently gives us a bit offset to use during the load from + // the bitset. + Value *OffsetSHR = + B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2)); + Value *OffsetSHL = B.CreateShl( + PtrOffset, ConstantInt::get(IntPtrTy, DL->getPointerSizeInBits(0) - + BSI.AlignLog2)); + BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); + } + + Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst); + + TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); + IRBuilder<> ThenB(Term); + + // Now that we know that the offset is in range and aligned, load the + // appropriate bit from the bitset. TODO: We might want to use the bt + // instruction with the previously computed bit offset; LLVM does not + // currently expose this instruction. + Value *BitSetGlobalOffset = ThenB.CreateLShr( + PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2 + 3)); + Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), BitSetGlobalOffset}; + Value *BitSetGlobalAddr = ThenB.CreateGEP(BitSetGlobal, Idxs); + Value *BitSetGlobalVal = ThenB.CreateLoad(BitSetGlobalAddr); + + Value *BitSetShift = ThenB.CreateTrunc(BitOffset, Int8Ty); + BitSetShift = ThenB.CreateAnd(BitSetShift, ConstantInt::get(Int8Ty, 7)); + + Value *Bit = ThenB.CreateLShr(BitSetGlobalVal, BitSetShift); + Bit = ThenB.CreateTrunc(Bit, Int1Ty); + + // The value we want is 0 if we came directly from the initial block + // (having failed the range or alignment checks), or the loaded bit if + // we came from the block in which we loaded it. + B.SetInsertPoint(CI); + PHINode *P = B.CreatePHI(Int1Ty, 2); + P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); + P->addIncoming(Bit, ThenB.GetInsertBlock()); + + CI->replaceAllUsesWith(P); + CI->eraseFromParent(); +} + +void LowerBitSets::buildBitSetsFromGlobals( + Module &M, + const std::vector &BitSets, + const std::vector &Globals) { + // Build a new global with the combined contents of the referenced globals. + std::vector GlobalInits; + for (GlobalVariable *G : Globals) + GlobalInits.push_back(G->getInitializer()); + Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); + auto CombinedGlobal = + new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, NewInit); + + const StructLayout *CombinedGlobalLayout = + DL->getStructLayout(cast(NewInit->getType())); + + // Compute the offsets of the original globals within the new global. + DenseMap GlobalLayout; + for (unsigned I = 0; I != Globals.size(); ++I) { + GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I); + } + + // For each bitset in this disjoint set... + for (MDString *BS : BitSets) { + // Build the bitset. + BitSetInfo BSI = buildBitSet(M, BS, GlobalLayout); + + // Create a global in which to store it. + auto BitSetGlobal = new GlobalVariable( + M, BSI.BitsConst->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, BSI.BitsConst, BS->getString() + ".bits"); + + // Lower each call to llvm.bitset.test for this bitset. + for (CallInst *CI : BitSetTestCallSites[BS]) + lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal); + } + + // Build aliases pointing to offsets into the combined global for each + // global from which we built the combined global, and replace references + // to the original globals with references to the aliases. + for (unsigned I = 0; I != Globals.size(); ++I) { + Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, I)}; + Constant *CombinedGlobalElemPtr = + ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs); + GlobalAlias *GAlias = GlobalAlias::create( + Globals[I]->getType()->getElementType(), + Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(), + "", CombinedGlobalElemPtr, &M); + GAlias->takeName(Globals[I]); + Globals[I]->replaceAllUsesWith(GAlias); + Globals[I]->eraseFromParent(); + } +} + +bool LowerBitSets::buildBitSets(Module &M) { + Function *BitSetTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::bitset_test)); + if (!BitSetTestFunc) + return false; + + // Equivalence class set containing bitsets and the globals they reference. + // This is used to partition the set of bitsets in the module into disjoint + // sets. + typedef EquivalenceClasses> + GlobalClassesTy; + GlobalClassesTy GlobalClasses; + + for (const Use &U : BitSetTestFunc->uses()) { + auto CI = cast(U.getUser()); + + auto BitSetMDVal = dyn_cast(CI->getArgOperand(1)); + if (!BitSetMDVal || !isa(BitSetMDVal->getMetadata())) + report_fatal_error( + "Second argument of llvm.bitset.test must be metadata string"); + auto BitSet = cast(BitSetMDVal->getMetadata()); + + // Add the call site to the list of call sites for this bit set. We also use + // BitSetTestCallSites to keep track of whether we have seen this bit set + // before. If we have, we don't need to re-add the referenced globals to the + // equivalence class. + std::pair>::iterator, + bool> Ins = + BitSetTestCallSites.insert( + std::make_pair(BitSet, std::vector())); + Ins.first->second.push_back(CI); + if (!Ins.second) + continue; + + // Add the bitset to the equivalence class. + GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet); + GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); + + if (!BitSetNM) + continue; + + // Verify the bitset metadata and add the referenced globals to the bitset's + // equivalence class. + for (MDNode *Op : BitSetNM->operands()) { + if (Op->getNumOperands() != 3) + report_fatal_error( + "All operands of llvm.bitsets metadata must have 3 elements"); + + if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) + continue; + + auto OpConstMD = dyn_cast(Op->getOperand(1)); + if (!OpConstMD) + report_fatal_error("Bit set element must be a constant"); + auto OpGlobal = dyn_cast(OpConstMD->getValue()); + if (!OpGlobal) + report_fatal_error("Bit set element must refer to global"); + + auto OffsetConstMD = dyn_cast(Op->getOperand(2)); + if (!OffsetConstMD) + report_fatal_error("Bit set element offset must be a constant"); + auto OffsetInt = dyn_cast(OffsetConstMD->getValue()); + if (!OffsetInt) + report_fatal_error( + "Bit set element offset must be an integer constant"); + + CurSet = GlobalClasses.unionSets( + CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal))); + } + } + + if (GlobalClasses.empty()) + return false; + + // For each disjoint set we found... + for (GlobalClassesTy::iterator I = GlobalClasses.begin(), + E = GlobalClasses.end(); + I != E; ++I) { + if (!I->isLeader()) continue; + + // Build the list of bitsets and referenced globals in this disjoint set. + std::vector BitSets; + std::vector Globals; + for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); + MI != GlobalClasses.member_end(); ++MI) { + if ((*MI).is()) + BitSets.push_back(MI->get()); + else + Globals.push_back(MI->get()); + } + + // Order bitsets and globals by name for determinism. TODO: We may later + // want to use a more sophisticated ordering that lays out globals so as to + // minimize the sizes of the bitsets. + std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) { + return S1->getString() < S2->getString(); + }); + std::sort(Globals.begin(), Globals.end(), + [](GlobalVariable *GV1, GlobalVariable *GV2) { + return GV1->getName() < GV2->getName(); + }); + + // Build the bitsets from this disjoint set. + buildBitSetsFromGlobals(M, BitSets, Globals); + } + + return true; +} + +bool LowerBitSets::eraseBitSetMetadata(Module &M) { + if (!BitSetNM) + return false; + + M.eraseNamedMetadata(BitSetNM); + return true; +} + +bool LowerBitSets::runOnModule(Module &M) { + if (!DL) + return false; + + bool Changed = buildBitSets(M); + Changed |= eraseBitSetMetadata(M); + return Changed; +} Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -476,6 +476,9 @@ // currently it damages debug info. if (MergeFunctions) PM.add(createMergeFunctionsPass()); + + // Lower bitset constants to bitsets. + PM.add(createLowerBitSetsPass()); } void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM) { Index: test/Transforms/LowerBitSets/simple.ll =================================================================== --- /dev/null +++ test/Transforms/LowerBitSets/simple.ll @@ -0,0 +1,132 @@ +; RUN: opt -S -lowerbitsets < %s | FileCheck %s +; RUN: opt -S -O3 < %s | FileCheck -check-prefix=CHECK-NODISCARD %s + +target datalayout = "e-p:32:32" + +; CHECK: [[G:@[^ ]*]] = private constant { i32, i32, i32, [2 x i32] } { i32 1, i32 2, i32 3, [2 x i32] [i32 4, i32 5] } +@a = constant i32 1 +@b = constant i32 2 +@c = constant i32 3 +@d = constant [2 x i32] [i32 4, i32 5] + +; Offset 0, 4 byte alignment +; CHECK: @bitset1.bits = private constant [1 x i8] c"\13" +!0 = !{!"bitset1", i32* @a, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset1", i32* @a, i32 0} +!1 = !{!"bitset1", i32* @b, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset1", i32* @b, i32 0} +!2 = !{!"bitset1", [2 x i32]* @d, i32 4} +; CHECK-NODISCARD-DAG: !{!"bitset1", [2 x i32]* @d, i32 4} + +; Offset 4, 4 byte alignment +; CHECK: @bitset2.bits = private constant [1 x i8] c"\03" +!3 = !{!"bitset2", i32* @b, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @b, i32 0} +!4 = !{!"bitset2", i32* @c, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @c, i32 0} + +; Offset 0, 8 byte alignment +; CHECK: @bitset3.bits = private constant [1 x i8] c"\03" +!5 = !{!"bitset3", i32* @a, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0} +!6 = !{!"bitset3", i32* @c, i32 0} +; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @c, i32 0} + +; Entries whose second operand is null (the result of a global being DCE'd) +; should be ignored. +!7 = !{!"bitset2", null, i32 0} + +!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 } + +; CHECK: @a = alias getelementptr inbounds ({ i32, i32, i32, [2 x i32] }* [[G]], i32 0, i32 0) +; CHECK: @b = alias getelementptr inbounds ({ i32, i32, i32, [2 x i32] }* [[G]], i32 0, i32 1) +; CHECK: @c = alias getelementptr inbounds ({ i32, i32, i32, [2 x i32] }* [[G]], i32 0, i32 2) +; CHECK: @d = alias getelementptr inbounds ({ i32, i32, i32, [2 x i32] }* [[G]], i32 0, i32 3) + +declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone + +; CHECK: @foo(i32* [[A0:%[^ ]*]]) +define i1 @foo(i32* %p) { + ; CHECK-NOT: llvm.bitset.test + + ; CHECK: [[R0:%[^ ]*]] = bitcast i32* [[A0]] to i8* + %pi8 = bitcast i32* %p to i8* + ; CHECK: [[R1:%[^ ]*]] = ptrtoint i8* [[R0]] to i32 + ; CHECK: [[R2:%[^ ]*]] = sub i32 [[R1]], ptrtoint ({ i32, i32, i32, [2 x i32] }* [[G]] to i32) + ; CHECK: [[R3:%[^ ]*]] = lshr i32 [[R2]], 2 + ; CHECK: [[R4:%[^ ]*]] = shl i32 [[R2]], 30 + ; CHECK: [[R5:%[^ ]*]] = or i32 [[R3]], [[R4]] + ; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 5 + ; CHECK: br i1 [[R6]] + + ; CHECK: [[R8:%[^ ]*]] = lshr i32 [[R2]], 5 + ; CHECK: [[R9:%[^ ]*]] = getelementptr [1 x i8]* @bitset1.bits, i32 0, i32 [[R8]] + ; CHECK: [[R10:%[^ ]*]] = load i8* [[R9]] + ; CHECK: [[R11:%[^ ]*]] = trunc i32 [[R5]] to i8 + ; CHECK: [[R12:%[^ ]*]] = and i8 [[R11]], 7 + ; CHECK: [[R13:%[^ ]*]] = lshr i8 [[R10]], [[R12]] + ; CHECK: [[R14:%[^ ]*]] = trunc i8 [[R13]] to i1 + + ; CHECK: [[R16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[R14]], {{%[^ ]*}} ] + %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1") + + ; CHECK-NOT: llvm.bitset.test + %y = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1") + + ; CHECK: ret i1 [[R16]] + ret i1 %x +} + +; CHECK: @bar(i32* [[B0:%[^ ]*]]) +define i1 @bar(i32* %p) { + ; CHECK: [[S0:%[^ ]*]] = bitcast i32* [[B0]] to i8* + %pi8 = bitcast i32* %p to i8* + ; CHECK: [[S1:%[^ ]*]] = ptrtoint i8* [[S0]] to i32 + ; CHECK: [[S2:%[^ ]*]] = sub i32 [[S1]], add (i32 ptrtoint ({ i32, i32, i32, [2 x i32] }* [[G]] to i32), i32 4) + ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 2 + ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 30 + ; CHECK: [[S5:%[^ ]*]] = or i32 [[S3]], [[S4]] + ; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 2 + ; CHECK: br i1 [[S6]] + + ; CHECK: [[S8:%[^ ]*]] = lshr i32 [[S2]], 5 + ; CHECK: [[S9:%[^ ]*]] = getelementptr [1 x i8]* @bitset2.bits, i32 0, i32 [[S8]] + ; CHECK: [[S10:%[^ ]*]] = load i8* [[S9]] + ; CHECK: [[S11:%[^ ]*]] = trunc i32 [[S5]] to i8 + ; CHECK: [[S12:%[^ ]*]] = and i8 [[S11]], 7 + ; CHECK: [[S13:%[^ ]*]] = lshr i8 [[S10]], [[S12]] + ; CHECK: [[S14:%[^ ]*]] = trunc i8 [[S13]] to i1 + + ; CHECK: [[S16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[S14]], {{%[^ ]*}} ] + %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset2") + ; CHECK: ret i1 [[S16]] + ret i1 %x +} + +; CHECK: @baz(i32* [[C0:%[^ ]*]]) +define i1 @baz(i32* %p) { + ; CHECK: [[T0:%[^ ]*]] = bitcast i32* [[C0]] to i8* + %pi8 = bitcast i32* %p to i8* + ; CHECK: [[T1:%[^ ]*]] = ptrtoint i8* [[T0]] to i32 + ; CHECK: [[T2:%[^ ]*]] = sub i32 [[T1]], ptrtoint ({ i32, i32, i32, [2 x i32] }* [[G]] to i32) + ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 3 + ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 29 + ; CHECK: [[T5:%[^ ]*]] = or i32 [[T3]], [[T4]] + ; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 2 + ; CHECK: br i1 [[T6]] + + ; CHECK: [[T8:%[^ ]*]] = lshr i32 [[T2]], 6 + ; CHECK: [[T9:%[^ ]*]] = getelementptr [1 x i8]* @bitset3.bits, i32 0, i32 [[T8]] + ; CHECK: [[T10:%[^ ]*]] = load i8* [[T9]] + ; CHECK: [[T11:%[^ ]*]] = trunc i32 [[T5]] to i8 + ; CHECK: [[T12:%[^ ]*]] = and i8 [[T11]], 7 + ; CHECK: [[T13:%[^ ]*]] = lshr i8 [[T10]], [[T12]] + ; CHECK: [[T14:%[^ ]*]] = trunc i8 [[T13]] to i1 + + ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T14]], {{%[^ ]*}} ] + %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3") + ; CHECK: ret i1 [[T16]] + ret i1 %x +} + +; CHECK-NOT: !llvm.bitsets