Diff 21059

include/llvm/Transforms/IPO/LowerBitSets.h

Show All 24 Lines

namespace llvm {		namespace llvm {

class DataLayout;		class DataLayout;
class GlobalVariable;		class GlobalVariable;
class Value;		class Value;

struct BitSetInfo {		struct BitSetInfo {
// The actual bitset.		// The indices of the set bits in the bitset.
std::vector<uint8_t> Bits;		std::set<uint64_t> Bits;
		kccUnsubmitted Not Done Reply Inline Actions Is this better than unordered_set? (I don't know, just asking) kcc: Is this better than unordered_set? (I don't know, just asking)
		pccAuthorUnsubmitted Not Done Reply Inline Actions Maybe, not sure though. DenseSet may be even better here. But I'd rather wait until this pass isn't a small fraction of the overall compile time before trying to optimize our data structures. pcc: Maybe, not sure though. DenseSet may be even better here. But I'd rather wait until this pass…

// The byte offset into the combined global represented by the bitset.		// The byte offset into the combined global represented by the bitset.
uint64_t ByteOffset;		uint64_t ByteOffset;

// The size of the bitset in bits.		// The size of the bitset in bits.
uint64_t BitSize;		uint64_t BitSize;

// Log2 alignment of the bit set relative to the combined global.		// Log2 alignment of the bit set relative to the combined global.
// For example, a log2 alignment of 3 means that bits in the bitset		// For example, a log2 alignment of 3 means that bits in the bitset
// represent addresses 8 bytes apart.		// represent addresses 8 bytes apart.
unsigned AlignLog2;		unsigned AlignLog2;

bool isSingleOffset() const {		bool isSingleOffset() const {
return Bits.size() == 1 && Bits[0] == 1;		return Bits.size() == 1;
}		}

bool isAllOnes() const {		bool isAllOnes() const {
for (unsigned I = 0; I != Bits.size() - 1; ++I)		return Bits.size() == BitSize;
if (Bits[I] != 0xFF)
return false;

if (BitSize % 8 == 0)
return Bits[Bits.size() - 1] == 0xFF;

return Bits[Bits.size() - 1] == (1 << (BitSize % 8)) - 1;
}		}

bool containsGlobalOffset(uint64_t Offset) const;		bool containsGlobalOffset(uint64_t Offset) const;

bool containsValue(const DataLayout *DL,		bool containsValue(const DataLayout *DL,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout,		const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout,
Value *V, uint64_t COffset = 0) const;		Value *V, uint64_t COffset = 0) const;

};		};

struct BitSetBuilder {		struct BitSetBuilder {
SmallVector<uint64_t, 16> Offsets;		SmallVector<uint64_t, 16> Offsets;
uint64_t Min, Max;		uint64_t Min, Max;

BitSetBuilder() : Min(std::numeric_limits<uint64_t>::max()), Max(0) {}		BitSetBuilder() : Min(std::numeric_limits<uint64_t>::max()), Max(0) {}

▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines	GlobalLayoutBuilder(uint64_t NumObjects)
: Fragments(1), FragmentMap(NumObjects) {}		: Fragments(1), FragmentMap(NumObjects) {}

/// Add F to the layout while trying to keep its indices contiguous.		/// Add F to the layout while trying to keep its indices contiguous.
/// If a previously seen fragment uses any of F's indices, that		/// If a previously seen fragment uses any of F's indices, that
/// fragment will be laid out inside F.		/// fragment will be laid out inside F.
void addFragment(const std::set<uint64_t> &F);		void addFragment(const std::set<uint64_t> &F);
};		};

		/// This class is used to build a byte array containing overlapping bit sets. By
		/// loading from indexed offsets into the byte array and applying a mask, a
		/// program can test bits from the bit set with a relatively short instruction
		/// sequence. For example, suppose we have 15 bit sets to lay out:
		///
		/// A (16 bits), B (15 bits), C (14 bits), D (13 bits), E (12 bits),
		/// F (11 bits), G (10 bits), H (9 bits), I (7 bits), J (6 bits), K (5 bits),
		/// L (4 bits), M (3 bits), N (2 bits), O (1 bit)
		///
		/// These bits can be laid out in a 16-byte array like this:
		///
		/// Byte Offset
		/// 0123456789ABCDEF
		/// Bit
		/// 7 HHHHHHHHHIIIIIII
		/// 6 GGGGGGGGGGJJJJJJ
		/// 5 FFFFFFFFFFFKKKKK
		/// 4 EEEEEEEEEEEELLLL
		/// 3 DDDDDDDDDDDDDMMM
		/// 2 CCCCCCCCCCCCCCNN
		/// 1 BBBBBBBBBBBBBBBO
		/// 0 AAAAAAAAAAAAAAAA
		///
		/// For example, to test bit X of A, we evaluate ((bits[X] & 1) != 0), or to
		/// test bit X of I, we evaluate ((bits[9 + X] & 0x80) != 0). This can be done
		/// in 1-2 machine instructions on x86, or 4-6 instructions on ARM.
		///
		/// This is a byte array, rather than (say) a 2-byte array or a 4-byte array,
		/// because for one thing it gives us better packing (the more bins there are,
		/// the less evenly they will be filled), and for another, the instruction
		/// sequences can be slightly shorter, both on x86 and ARM.
		struct ByteArrayBuilder {
		jfbUnsubmitted Not Done Reply Inline Actions Could you add a symbolic constant for 8? Also, why 8? Is it good for x86 but now as good for ARM? jfb: Could you add a symbolic constant for 8? Also, why 8? Is it good for x86 but now as good for…
		pccAuthorUnsubmitted Not Done Reply Inline Actions Are you asking why I am using 8 bits per array element? Well, for one thing it gives us better packing (the more bins there are, the less evenly they will be filled). For another, the instruction sequences can be slightly shorter, both on x86 and ARM. For example, this code: typedef unsigned long T; typedef void (F)(void); T table; unsigned char bitset[1000]; void FOO(T obj) { T vptr = obj; T ptr_offset = (T)vptr - (T)table; T t1 = ptr_offset << (sizeof(void)8 - 3); T t2 = ptr_offset >> 3; T t3 = t1 \| t2; unsigned bits = bitset[t3]; if ((t3 >= TABLE_SIZE) \|\| (bits & 1)) __builtin_trap(); F f = (F)vptr[4]; f(); } is one byte shorter (on x86) or one instruction shorter (on ARM) than this code: typedef unsigned long T; typedef void (F)(void); T table; unsigned short bitset[1000]; void FOO(T *obj) { T vptr = obj; T ptr_offset = (T)vptr - (T)table; T t1 = ptr_offset << (sizeof(void)8 - 3); T t2 = ptr_offset >> 3; T t3 = t1 \| t2; unsigned bits = bitset[t3]; if ((t3 >= TABLE_SIZE) \|\| (bits & 0x100)) __builtin_trap(); F f = (F)vptr[4]; f(); } I guess I could parameterise this but it seems like it would add more complexity than we need. Happy to document the choice of 8 bits per array element. pcc:* Are you asking why I am using 8 bits per array element? Well, for one thing it gives us better…
		jfbUnsubmitted Not Done Reply Inline Actions Yes, documentation for the choice of 8 is good :) jfb: Yes, documentation for the choice of 8 is good :)
		pccAuthorUnsubmitted Not Done Reply Inline Actions Done pcc: Done
		/// The byte array built so far.
		std::vector<uint8_t> Bytes;

		/// The number of bytes allocated so far for each of the bits.
		jfbUnsubmitted Not Done Reply Inline Actions `memset` or `std::fill`. jfb: `memset` or `std::fill`.
		pccAuthorUnsubmitted Not Done Reply Inline Actions Done pcc: Done
		uint64_t BitAllocs[8];
		jfbUnsubmitted Not Done Reply Inline Actions I'd still rather have to 8 pulled out as its own symbolic constant, instead of being used here and below as a magic number. jfb: I'd still rather have to 8 pulled out as its own symbolic constant, instead of being used here…
		pccAuthorUnsubmitted Not Done Reply Inline Actions done pcc: done

		ByteArrayBuilder() {
		memset(BitAllocs, 0, sizeof(BitAllocs));
		}
		kccUnsubmitted Not Done Reply Inline Actions LPT == longest process time? Maybe mention the full name here? kcc: LPT == longest process time? Maybe mention the full name here?
		pccAuthorUnsubmitted Not Done Reply Inline Actions Done pcc: Done

		/// Allocate BitSize bits in the byte array where Bits contains the bits to
		/// set. AllocByteOffset is set to the offset within the byte array and
		/// AllocMask is set to the bitmask for those bits. This uses the LPT (Longest
		/// Processing Time) multiprocessor scheduling algorithm to lay out the bits
		/// efficiently; the pass allocates bit sets in decreasing size order.
		void allocate(const std::set<uint64_t> &Bits, uint64_t BitSize,
		uint64_t &AllocByteOffset, uint8_t &AllocMask);
		};

} // namespace llvm		} // namespace llvm

#endif		#endif

lib/Transforms/IPO/LowerBitSets.cpp

Show All 25 Lines
#include "llvm/IR/Operator.h"		#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"		#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"		#include "llvm/Transforms/Utils/BasicBlockUtils.h"

using namespace llvm;		using namespace llvm;

#define DEBUG_TYPE "lowerbitsets"		#define DEBUG_TYPE "lowerbitsets"

STATISTIC(NumBitSetsCreated, "Number of bitsets created");		STATISTIC(ByteArraySizeBits, "Byte array size in bits");
		STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
		STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
		kccUnsubmitted Not Done Reply Inline Actions It would be interesting to see more details stats here, e.g. how many bits were packed into how many bytes. kcc: It would be interesting to see more details stats here, e.g. how many bits were packed into…
		jfbUnsubmitted Not Done Reply Inline Actions +1 How many bits are unused (didn't fit) would be interesting. jfb: +1 How many bits are unused (didn't fit) would be interesting.
		pccAuthorUnsubmitted Not Done Reply Inline Actions I've added some more stats here. pcc: I've added some more stats here.
STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered");		STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered");
STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets");		STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets");

bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {		bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
if (Offset < ByteOffset)		if (Offset < ByteOffset)
return false;		return false;

if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)		if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)
return false;		return false;

uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;		uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;
if (BitOffset >= BitSize)		if (BitOffset >= BitSize)
return false;		return false;

return (Bits[BitOffset / 8] >> (BitOffset % 8)) & 1;		return Bits.count(BitOffset);
}		}

bool BitSetInfo::containsValue(		bool BitSetInfo::containsValue(
const DataLayout *DL,		const DataLayout *DL,
const DenseMap<GlobalVariable , uint64_t> &GlobalLayout, Value V,		const DenseMap<GlobalVariable , uint64_t> &GlobalLayout, Value V,
uint64_t COffset) const {		uint64_t COffset) const {
if (auto GV = dyn_cast<GlobalVariable>(V)) {		if (auto GV = dyn_cast<GlobalVariable>(V)) {
auto I = GlobalLayout.find(GV);		auto I = GlobalLayout.find(GV);
Show All 38 Lines	for (uint64_t &Offset : Offsets) {
Offset -= Min;		Offset -= Min;
Mask \|= Offset;		Mask \|= Offset;
}		}

BitSetInfo BSI;		BitSetInfo BSI;
BSI.ByteOffset = Min;		BSI.ByteOffset = Min;

BSI.AlignLog2 = 0;		BSI.AlignLog2 = 0;
// FIXME: Can probably do something smarter if all offsets are 0.
if (Mask != 0)		if (Mask != 0)
BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);		BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);

// Build the compressed bitset while normalizing the offsets against the		// Build the compressed bitset while normalizing the offsets against the
// computed alignment.		// computed alignment.
BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;		BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
uint64_t ByteSize = (BSI.BitSize + 7) / 8;
BSI.Bits.resize(ByteSize);
for (uint64_t Offset : Offsets) {		for (uint64_t Offset : Offsets) {
Offset >>= BSI.AlignLog2;		Offset >>= BSI.AlignLog2;
BSI.Bits[Offset / 8] \|= 1 << (Offset % 8);		BSI.Bits.insert(Offset);
}		}

return BSI;		return BSI;
}		}

void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {		void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
// Create a new fragment to hold the layout for F.		// Create a new fragment to hold the layout for F.
Fragments.emplace_back();		Fragments.emplace_back();
Show All 18 Lines	for (auto ObjIndex : F) {
}		}
}		}

// Update the fragment map to point our object indices to this fragment.		// Update the fragment map to point our object indices to this fragment.
for (uint64_t ObjIndex : Fragment)		for (uint64_t ObjIndex : Fragment)
FragmentMap[ObjIndex] = FragmentIndex;		FragmentMap[ObjIndex] = FragmentIndex;
}		}

		void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
		uint64_t BitSize, uint64_t &AllocByteOffset,
		uint8_t &AllocMask) {
		// Find the smallest current allocation.
		unsigned Bit = 0;
		for (unsigned I = 1; I != 8; ++I)
		jfbUnsubmitted Not Done Reply Inline Actions Update 8. jfb: Update 8.
		pccAuthorUnsubmitted Not Done Reply Inline Actions See my other comment. pcc: See my other comment.
		if (BitAllocs[I] < BitAllocs[Bit])
		Bit = I;

		AllocByteOffset = BitAllocs[Bit];

		// Add our size to it.
		unsigned ReqSize = AllocByteOffset + BitSize;
		BitAllocs[Bit] = ReqSize;
		if (Bytes.size() < ReqSize)
		Bytes.resize(ReqSize);

		// Set our bits.
		AllocMask = 1 << Bit;
		for (uint64_t B : Bits)
		Bytes[AllocByteOffset + B] \|= AllocMask;
		}

namespace {		namespace {

		struct ByteArrayInfo {
		std::set<uint64_t> Bits;
		uint64_t BitSize;
		GlobalVariable *ByteArray;
		Constant *Mask;
		};

struct LowerBitSets : public ModulePass {		struct LowerBitSets : public ModulePass {
static char ID;		static char ID;
LowerBitSets() : ModulePass(ID) {		LowerBitSets() : ModulePass(ID) {
initializeLowerBitSetsPass(*PassRegistry::getPassRegistry());		initializeLowerBitSetsPass(*PassRegistry::getPassRegistry());
}		}

		Module *M;

const DataLayout *DL;		const DataLayout *DL;
IntegerType *Int1Ty;		IntegerType *Int1Ty;
IntegerType *Int8Ty;		IntegerType *Int8Ty;
IntegerType *Int32Ty;		IntegerType *Int32Ty;
Type *Int32PtrTy;		Type *Int32PtrTy;
IntegerType *Int64Ty;		IntegerType *Int64Ty;
Type *IntPtrTy;		Type *IntPtrTy;

// The llvm.bitsets named metadata.		// The llvm.bitsets named metadata.
NamedMDNode *BitSetNM;		NamedMDNode *BitSetNM;

// Mapping from bitset mdstrings to the call sites that test them.		// Mapping from bitset mdstrings to the call sites that test them.
DenseMap<MDString , std::vector<CallInst >> BitSetTestCallSites;		DenseMap<MDString , std::vector<CallInst >> BitSetTestCallSites;

		std::vector<ByteArrayInfo> ByteArrayInfos;

BitSetInfo		BitSetInfo
buildBitSet(MDString *BitSet,		buildBitSet(MDString *BitSet,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);		const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
Value *createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI,		ByteArrayInfo *createByteArray(BitSetInfo &BSI);
GlobalVariable BitSetGlobal, Value BitOffset);		void allocateByteArrays();
		Value createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo &BAI,
		Value *BitOffset);
Value *		Value *
lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI,		lowerBitSetCall(CallInst CI, BitSetInfo &BSI, ByteArrayInfo &BAI,
GlobalVariable BitSetGlobal, GlobalVariable CombinedGlobal,		GlobalVariable *CombinedGlobal,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);		const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
void buildBitSetsFromGlobals(Module &M,		void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets,
const std::vector<MDString *> &BitSets,
const std::vector<GlobalVariable *> &Globals);		const std::vector<GlobalVariable *> &Globals);
bool buildBitSets(Module &M);		bool buildBitSets();
bool eraseBitSetMetadata(Module &M);		bool eraseBitSetMetadata();

bool doInitialization(Module &M) override;		bool doInitialization(Module &M) override;
bool runOnModule(Module &M) override;		bool runOnModule(Module &M) override;
};		};

} // namespace		} // namespace

INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",		INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
"Lower bitset metadata", false, false)		"Lower bitset metadata", false, false)
INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets",		INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets",
"Lower bitset metadata", false, false)		"Lower bitset metadata", false, false)
char LowerBitSets::ID = 0;		char LowerBitSets::ID = 0;

ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; }		ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; }

bool LowerBitSets::doInitialization(Module &M) {		bool LowerBitSets::doInitialization(Module &Mod) {
DL = M.getDataLayout();		M = &Mod;

		DL = M->getDataLayout();
if (!DL)		if (!DL)
report_fatal_error("Data layout required");		report_fatal_error("Data layout required");

Int1Ty = Type::getInt1Ty(M.getContext());		Int1Ty = Type::getInt1Ty(M->getContext());
Int8Ty = Type::getInt8Ty(M.getContext());		Int8Ty = Type::getInt8Ty(M->getContext());
Int32Ty = Type::getInt32Ty(M.getContext());		Int32Ty = Type::getInt32Ty(M->getContext());
Int32PtrTy = PointerType::getUnqual(Int32Ty);		Int32PtrTy = PointerType::getUnqual(Int32Ty);
Int64Ty = Type::getInt64Ty(M.getContext());		Int64Ty = Type::getInt64Ty(M->getContext());
IntPtrTy = DL->getIntPtrType(M.getContext(), 0);		IntPtrTy = DL->getIntPtrType(M->getContext(), 0);

BitSetNM = M.getNamedMetadata("llvm.bitsets");		BitSetNM = M->getNamedMetadata("llvm.bitsets");

BitSetTestCallSites.clear();		BitSetTestCallSites.clear();

return false;		return false;
}		}

/// Build a bit set for BitSet using the object layouts in		/// Build a bit set for BitSet using the object layouts in
/// GlobalLayout.		/// GlobalLayout.
Show All 32 Lines	static Value createMaskedBitTest(IRBuilder<> &B, Value Bits,
BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType);		BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType);
Value *BitIndex =		Value *BitIndex =
B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1));		B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1));
Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex);		Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex);
Value *MaskedBits = B.CreateAnd(Bits, BitMask);		Value *MaskedBits = B.CreateAnd(Bits, BitMask);
return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));		return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
}		}

		ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) {
		// Create globals to stand in for byte arrays and masks. These never actually
		// get initialized, we RAUW and erase them later in allocateByteArrays() once
		jfbUnsubmitted Not Done Reply Inline Actions Capitalize RAUW. jfb: Capitalize RAUW.
		pccAuthorUnsubmitted Not Done Reply Inline Actions Done pcc: Done
		// we know the offset and mask to use.
		auto ByteArrayGlobal = new GlobalVariable(
		M, Int8Ty, /isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
		auto MaskGlobal = new GlobalVariable(
		M, Int8Ty, /isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);

		ByteArrayInfos.emplace_back();
		ByteArrayInfo *BAI = &ByteArrayInfos.back();

		BAI->Bits = BSI.Bits;
		BAI->BitSize = BSI.BitSize;
		BAI->ByteArray = ByteArrayGlobal;
		BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty);
		return BAI;
		}

		void LowerBitSets::allocateByteArrays() {
		std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
		[](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
		return BAI1.BitSize > BAI2.BitSize;
		});

		std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());

		ByteArrayBuilder BAB;
		for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
		ByteArrayInfo *BAI = &ByteArrayInfos[I];

		uint8_t Mask;
		BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);

		BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask));
		cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();
		}

		Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes);
		auto ByteArray =
		new GlobalVariable(M, ByteArrayConst->getType(), /isConstant=*/true,
		GlobalValue::PrivateLinkage, ByteArrayConst);

		for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
		ByteArrayInfo *BAI = &ByteArrayInfos[I];

		Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
		ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
		Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(ByteArray, Idxs);

		// Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
		// that the pc-relative displacement is folded into the lea instead of the
		// test instruction getting another displacement.
		GlobalAlias *Alias = GlobalAlias::create(
		Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M);
		BAI->ByteArray->replaceAllUsesWith(Alias);
		BAI->ByteArray->eraseFromParent();
		}

		ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] +
		BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] +
		BAB.BitAllocs[6] + BAB.BitAllocs[7];
		ByteArraySizeBytes = BAB.Bytes.size();
		}

/// Build a test that bit BitOffset is set in BSI, where		/// Build a test that bit BitOffset is set in BSI, where
/// BitSetGlobal is a global containing the bits in BSI.		/// BitSetGlobal is a global containing the bits in BSI.
Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI,		Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
GlobalVariable *BitSetGlobal,		ByteArrayInfo &BAI, Value BitOffset) {
Value *BitOffset) {		if (BSI.BitSize <= 64) {
		jfbUnsubmitted Not Done Reply Inline Actions Just to be sure I understand: this inlines small (<= 64 bit) bitsets. The current patch only benefits from co-locating larger bitsets, right? It would probably be good for the docs to specify this double approach to reducing bitset footprint. jfb: Just to be sure I understand: this inlines small (<= 64 bit) bitsets. The current patch only…
		pccAuthorUnsubmitted Not Done Reply Inline Actions We already document the technique for inlining <=64 bits: http://clang.llvm.org/docs/ControlFlowIntegrityDesign.html#short-inline-bit-vectors and I do plan to document byte arrays on that page once this lands. pcc: We already document the technique for inlining <=64 bits: http://clang.llvm.
		jfbUnsubmitted Not Done Reply Inline Actions sgtm jfb: sgtm
if (BSI.Bits.size() <= 8) {
// If the bit set is sufficiently small, we can avoid a load by bit testing		// If the bit set is sufficiently small, we can avoid a load by bit testing
// a constant.		// a constant.
IntegerType *BitsTy;		IntegerType *BitsTy;
if (BSI.Bits.size() <= 4)		if (BSI.BitSize <= 32)
BitsTy = Int32Ty;		BitsTy = Int32Ty;
else		else
BitsTy = Int64Ty;		BitsTy = Int64Ty;

uint64_t Bits = 0;		uint64_t Bits = 0;
for (auto I = BSI.Bits.rbegin(), E = BSI.Bits.rend(); I != E; ++I) {		for (auto Bit : BSI.Bits)
Bits <<= 8;		Bits \|= uint64_t(1) << Bit;
Bits \|= *I;
}
Constant *BitsConst = ConstantInt::get(BitsTy, Bits);		Constant *BitsConst = ConstantInt::get(BitsTy, Bits);
return createMaskedBitTest(B, BitsConst, BitOffset);		return createMaskedBitTest(B, BitsConst, BitOffset);
} else {		} else {
// TODO: We might want to use the memory variant of the bt instruction		if (!BAI) {
// with the previously computed bit offset at -Os. This instruction does		++NumByteArraysCreated;
// exactly what we want but has been benchmarked as being slower than open		BAI = createByteArray(BSI);
// coding the load+bt.		}
Value *BitSetGlobalOffset =
B.CreateLShr(BitOffset, ConstantInt::get(IntPtrTy, 5));		Value *ByteAddr = B.CreateGEP(BAI->ByteArray, BitOffset);
Value *BitSetEntryAddr = B.CreateGEP(		Value *Byte = B.CreateLoad(ByteAddr);
ConstantExpr::getBitCast(BitSetGlobal, Int32PtrTy), BitSetGlobalOffset);
Value *BitSetEntry = B.CreateLoad(BitSetEntryAddr);

return createMaskedBitTest(B, BitSetEntry, BitOffset);		Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
		return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
}		}
}		}

/// Lower a llvm.bitset.test call to its implementation. Returns the value to		/// Lower a llvm.bitset.test call to its implementation. Returns the value to
/// replace the call with.		/// replace the call with.
Value *LowerBitSets::lowerBitSetCall(		Value *LowerBitSets::lowerBitSetCall(
CallInst CI, const BitSetInfo &BSI, GlobalVariable BitSetGlobal,		CallInst CI, BitSetInfo &BSI, ByteArrayInfo &BAI,
GlobalVariable *CombinedGlobal,		GlobalVariable *CombinedGlobal,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {		const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
Value *Ptr = CI->getArgOperand(0);		Value *Ptr = CI->getArgOperand(0);

if (BSI.containsValue(DL, GlobalLayout, Ptr))		if (BSI.containsValue(DL, GlobalLayout, Ptr))
return ConstantInt::getTrue(BitSetGlobal->getParent()->getContext());		return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext());

Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);		Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(		Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset));		GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset));

BasicBlock *InitialBB = CI->getParent();		BasicBlock *InitialBB = CI->getParent();

IRBuilder<> B(CI);		IRBuilder<> B(CI);
Show All 32 Lines	Value *LowerBitSets::lowerBitSetCall(
if (BSI.isAllOnes())		if (BSI.isAllOnes())
return OffsetInRange;		return OffsetInRange;

TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);		TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
IRBuilder<> ThenB(Term);		IRBuilder<> ThenB(Term);

// Now that we know that the offset is in range and aligned, load the		// Now that we know that the offset is in range and aligned, load the
// appropriate bit from the bitset.		// appropriate bit from the bitset.
Value *Bit = createBitSetTest(ThenB, BSI, BitSetGlobal, BitOffset);		Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);

// The value we want is 0 if we came directly from the initial block		// The value we want is 0 if we came directly from the initial block
// (having failed the range or alignment checks), or the loaded bit if		// (having failed the range or alignment checks), or the loaded bit if
// we came from the block in which we loaded it.		// we came from the block in which we loaded it.
B.SetInsertPoint(CI);		B.SetInsertPoint(CI);
PHINode *P = B.CreatePHI(Int1Ty, 2);		PHINode *P = B.CreatePHI(Int1Ty, 2);
P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);		P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
P->addIncoming(Bit, ThenB.GetInsertBlock());		P->addIncoming(Bit, ThenB.GetInsertBlock());
return P;		return P;
}		}

/// Given a disjoint set of bitsets and globals, layout the globals, build the		/// Given a disjoint set of bitsets and globals, layout the globals, build the
/// bit sets and lower the llvm.bitset.test calls.		/// bit sets and lower the llvm.bitset.test calls.
void LowerBitSets::buildBitSetsFromGlobals(		void LowerBitSets::buildBitSetsFromGlobals(
Module &M,
const std::vector<MDString *> &BitSets,		const std::vector<MDString *> &BitSets,
const std::vector<GlobalVariable *> &Globals) {		const std::vector<GlobalVariable *> &Globals) {
// Build a new global with the combined contents of the referenced globals.		// Build a new global with the combined contents of the referenced globals.
std::vector<Constant *> GlobalInits;		std::vector<Constant *> GlobalInits;
for (GlobalVariable *G : Globals) {		for (GlobalVariable *G : Globals) {
GlobalInits.push_back(G->getInitializer());		GlobalInits.push_back(G->getInitializer());
uint64_t InitSize = DL->getTypeAllocSize(G->getInitializer()->getType());		uint64_t InitSize = DL->getTypeAllocSize(G->getInitializer()->getType());

// Compute the amount of padding required to align the next element to the		// Compute the amount of padding required to align the next element to the
// next power of 2.		// next power of 2.
uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;		uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;

// Cap at 128 was found experimentally to have a good data/instruction		// Cap at 128 was found experimentally to have a good data/instruction
// overhead tradeoff.		// overhead tradeoff.
if (Padding > 128)		if (Padding > 128)
Padding = RoundUpToAlignment(InitSize, 128) - InitSize;		Padding = RoundUpToAlignment(InitSize, 128) - InitSize;

GlobalInits.push_back(		GlobalInits.push_back(
ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));		ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
}		}
if (!GlobalInits.empty())		if (!GlobalInits.empty())
GlobalInits.pop_back();		GlobalInits.pop_back();
Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);		Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
auto CombinedGlobal =		auto CombinedGlobal =
new GlobalVariable(M, NewInit->getType(), /isConstant=/true,		new GlobalVariable(M, NewInit->getType(), /isConstant=*/true,
GlobalValue::PrivateLinkage, NewInit);		GlobalValue::PrivateLinkage, NewInit);

const StructLayout *CombinedGlobalLayout =		const StructLayout *CombinedGlobalLayout =
DL->getStructLayout(cast<StructType>(NewInit->getType()));		DL->getStructLayout(cast<StructType>(NewInit->getType()));

// Compute the offsets of the original globals within the new global.		// Compute the offsets of the original globals within the new global.
DenseMap<GlobalVariable *, uint64_t> GlobalLayout;		DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
for (unsigned I = 0; I != Globals.size(); ++I)		for (unsigned I = 0; I != Globals.size(); ++I)
// Multiply by 2 to account for padding elements.		// Multiply by 2 to account for padding elements.
GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);		GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);

// For each bitset in this disjoint set...		// For each bitset in this disjoint set...
for (MDString *BS : BitSets) {		for (MDString *BS : BitSets) {
// Build the bitset.		// Build the bitset.
BitSetInfo BSI = buildBitSet(BS, GlobalLayout);		BitSetInfo BSI = buildBitSet(BS, GlobalLayout);

// Create a global in which to store it.		ByteArrayInfo *BAI = 0;
++NumBitSetsCreated;
Constant *BitsConst = ConstantDataArray::get(M.getContext(), BSI.Bits);
auto BitSetGlobal = new GlobalVariable(
M, BitsConst->getType(), /isConstant=/true,
GlobalValue::PrivateLinkage, BitsConst, BS->getString() + ".bits");

// Lower each call to llvm.bitset.test for this bitset.		// Lower each call to llvm.bitset.test for this bitset.
for (CallInst *CI : BitSetTestCallSites[BS]) {		for (CallInst *CI : BitSetTestCallSites[BS]) {
++NumBitSetCallsLowered;		++NumBitSetCallsLowered;
Value *Lowered =		Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout);
lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout);
CI->replaceAllUsesWith(Lowered);		CI->replaceAllUsesWith(Lowered);
CI->eraseFromParent();		CI->eraseFromParent();
}		}
}		}

// Build aliases pointing to offsets into the combined global for each		// Build aliases pointing to offsets into the combined global for each
// global from which we built the combined global, and replace references		// global from which we built the combined global, and replace references
// to the original globals with references to the aliases.		// to the original globals with references to the aliases.
for (unsigned I = 0; I != Globals.size(); ++I) {		for (unsigned I = 0; I != Globals.size(); ++I) {
// Multiply by 2 to account for padding elements.		// Multiply by 2 to account for padding elements.
Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),		Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
ConstantInt::get(Int32Ty, I * 2)};		ConstantInt::get(Int32Ty, I * 2)};
Constant *CombinedGlobalElemPtr =		Constant *CombinedGlobalElemPtr =
ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs);		ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs);
GlobalAlias *GAlias = GlobalAlias::create(		GlobalAlias *GAlias = GlobalAlias::create(
Globals[I]->getType()->getElementType(),		Globals[I]->getType()->getElementType(),
Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(),		Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(),
"", CombinedGlobalElemPtr, &M);		"", CombinedGlobalElemPtr, M);
GAlias->takeName(Globals[I]);		GAlias->takeName(Globals[I]);
Globals[I]->replaceAllUsesWith(GAlias);		Globals[I]->replaceAllUsesWith(GAlias);
Globals[I]->eraseFromParent();		Globals[I]->eraseFromParent();
}		}
}		}

/// Lower all bit sets in this module.		/// Lower all bit sets in this module.
bool LowerBitSets::buildBitSets(Module &M) {		bool LowerBitSets::buildBitSets() {
Function *BitSetTestFunc =		Function *BitSetTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::bitset_test));		M->getFunction(Intrinsic::getName(Intrinsic::bitset_test));
if (!BitSetTestFunc)		if (!BitSetTestFunc)
return false;		return false;

// Equivalence class set containing bitsets and the globals they reference.		// Equivalence class set containing bitsets and the globals they reference.
// This is used to partition the set of bitsets in the module into disjoint		// This is used to partition the set of bitsets in the module into disjoint
// sets.		// sets.
typedef EquivalenceClasses<PointerUnion<GlobalVariable , MDString >>		typedef EquivalenceClasses<PointerUnion<GlobalVariable , MDString >>
GlobalClassesTy;		GlobalClassesTy;
▲ Show 20 Lines • Show All 125 Lines • ▼ Show 20 Lines	for (auto &&F : GLB.Fragments)
*OGI++ = Globals[Offset];		*OGI++ = Globals[Offset];

// Order bitsets by name for determinism.		// Order bitsets by name for determinism.
std::sort(BitSets.begin(), BitSets.end(), [](MDString S1, MDString S2) {		std::sort(BitSets.begin(), BitSets.end(), [](MDString S1, MDString S2) {
return S1->getString() < S2->getString();		return S1->getString() < S2->getString();
});		});

// Build the bitsets from this disjoint set.		// Build the bitsets from this disjoint set.
buildBitSetsFromGlobals(M, BitSets, OrderedGlobals);		buildBitSetsFromGlobals(BitSets, OrderedGlobals);
}		}

		allocateByteArrays();

return true;		return true;
}		}

bool LowerBitSets::eraseBitSetMetadata(Module &M) {		bool LowerBitSets::eraseBitSetMetadata() {
if (!BitSetNM)		if (!BitSetNM)
return false;		return false;

M.eraseNamedMetadata(BitSetNM);		M->eraseNamedMetadata(BitSetNM);
return true;		return true;
}		}

bool LowerBitSets::runOnModule(Module &M) {		bool LowerBitSets::runOnModule(Module &M) {
bool Changed = buildBitSets(M);		bool Changed = buildBitSets();
Changed \|= eraseBitSetMetadata(M);		Changed \|= eraseBitSetMetadata();
return Changed;		return Changed;
}		}

test/Transforms/LowerBitSets/simple.ll

; RUN: opt -S -lowerbitsets < %s \| FileCheck %s		; RUN: opt -S -lowerbitsets < %s \| FileCheck %s
; RUN: opt -S -O3 < %s \| FileCheck -check-prefix=CHECK-NODISCARD %s		; RUN: opt -S -O3 < %s \| FileCheck -check-prefix=CHECK-NODISCARD %s

target datalayout = "e-p:32:32"		target datalayout = "e-p:32:32"

; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } { i32 1, [0 x i8] zeroinitializer, [63 x i32] zeroinitializer, [4 x i8] zeroinitializer, i32 3, [0 x i8] zeroinitializer, [2 x i32] [i32 4, i32 5] }		; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } { i32 1, [0 x i8] zeroinitializer, [63 x i32] zeroinitializer, [4 x i8] zeroinitializer, i32 3, [0 x i8] zeroinitializer, [2 x i32] [i32 4, i32 5] }
@a = constant i32 1		@a = constant i32 1
@b = constant [63 x i32] zeroinitializer		@b = constant [63 x i32] zeroinitializer
@c = constant i32 3		@c = constant i32 3
@d = constant [2 x i32] [i32 4, i32 5]		@d = constant [2 x i32] [i32 4, i32 5]

		; CHECK: [[BA:@[^ ]*]] = private constant [68 x i8] c"\03\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\00\01"

; Offset 0, 4 byte alignment		; Offset 0, 4 byte alignment
; CHECK: @bitset1.bits = private constant [9 x i8] c"\03\00\00\00\00\00\00\00\08"
!0 = !{!"bitset1", i32* @a, i32 0}		!0 = !{!"bitset1", i32* @a, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset1", i32* @a, i32 0}		; CHECK-NODISCARD-DAG: !{!"bitset1", i32* @a, i32 0}
!1 = !{!"bitset1", [63 x i32]* @b, i32 0}		!1 = !{!"bitset1", [63 x i32]* @b, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset1", [63 x i32]* @b, i32 0}		; CHECK-NODISCARD-DAG: !{!"bitset1", [63 x i32]* @b, i32 0}
!2 = !{!"bitset1", [2 x i32]* @d, i32 4}		!2 = !{!"bitset1", [2 x i32]* @d, i32 4}
; CHECK-NODISCARD-DAG: !{!"bitset1", [2 x i32]* @d, i32 4}		; CHECK-NODISCARD-DAG: !{!"bitset1", [2 x i32]* @d, i32 4}

; Offset 4, 256 byte alignment		; Offset 4, 256 byte alignment
; CHECK: @bitset2.bits = private constant [1 x i8] c"\03"
!3 = !{!"bitset2", [63 x i32]* @b, i32 0}		!3 = !{!"bitset2", [63 x i32]* @b, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset2", [63 x i32]* @b, i32 0}		; CHECK-NODISCARD-DAG: !{!"bitset2", [63 x i32]* @b, i32 0}
!4 = !{!"bitset2", i32* @c, i32 0}		!4 = !{!"bitset2", i32* @c, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @c, i32 0}		; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @c, i32 0}

; Offset 0, 4 byte alignment		; Offset 0, 4 byte alignment
; CHECK: @bitset3.bits = private constant [9 x i8] c"\01\00\00\00\00\00\00\00\02"
!5 = !{!"bitset3", i32* @a, i32 0}		!5 = !{!"bitset3", i32* @a, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0}		; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0}
!6 = !{!"bitset3", i32* @c, i32 0}		!6 = !{!"bitset3", i32* @c, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @c, i32 0}		; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @c, i32 0}

; Entries whose second operand is null (the result of a global being DCE'd)		; Entries whose second operand is null (the result of a global being DCE'd)
; should be ignored.		; should be ignored.
!7 = !{!"bitset2", null, i32 0}		!7 = !{!"bitset2", null, i32 0}

!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 }		!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 }

; CHECK: @a = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)		; CHECK: @a = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
; CHECK: @b = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)		; CHECK: @b = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
; CHECK: @c = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)		; CHECK: @c = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
; CHECK: @d = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)		; CHECK: @d = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)

		; CHECK: @bits = private alias getelementptr inbounds ([68 x i8]* [[BA]], i32 0, i32 0)
		; CHECK: @bits1 = private alias getelementptr inbounds ([68 x i8]* [[BA]], i32 0, i32 0)

declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone		declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone

; CHECK: @foo(i32* [[A0:%[^ ]*]])		; CHECK: @foo(i32* [[A0:%[^ ]*]])
define i1 @foo(i32* %p) {		define i1 @foo(i32* %p) {
; CHECK-NOT: llvm.bitset.test		; CHECK-NOT: llvm.bitset.test

; CHECK: [[R0:%[^ ]]] = bitcast i32 [[A0]] to i8*		; CHECK: [[R0:%[^ ]]] = bitcast i32 [[A0]] to i8*
%pi8 = bitcast i32* %p to i8*		%pi8 = bitcast i32* %p to i8*
; CHECK: [[R1:%[^ ]]] = ptrtoint i8 [[R0]] to i32		; CHECK: [[R1:%[^ ]]] = ptrtoint i8 [[R0]] to i32
; CHECK: [[R2:%[^ ]]] = sub i32 [[R1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } [[G]] to i32)		; CHECK: [[R2:%[^ ]]] = sub i32 [[R1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } [[G]] to i32)
; CHECK: [[R3:%[^ ]*]] = lshr i32 [[R2]], 2		; CHECK: [[R3:%[^ ]*]] = lshr i32 [[R2]], 2
; CHECK: [[R4:%[^ ]*]] = shl i32 [[R2]], 30		; CHECK: [[R4:%[^ ]*]] = shl i32 [[R2]], 30
; CHECK: [[R5:%[^ ]*]] = or i32 [[R3]], [[R4]]		; CHECK: [[R5:%[^ ]*]] = or i32 [[R3]], [[R4]]
; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 68		; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 68
; CHECK: br i1 [[R6]]		; CHECK: br i1 [[R6]]

; CHECK: [[R8:%[^ ]*]] = lshr i32 [[R5]], 5		; CHECK: [[R8:%[^ ]]] = getelementptr i8, i8 @bits, i32 [[R5]]
; CHECK: [[R9:%[^ ]]] = getelementptr i32, i32 bitcast ([9 x i8]* @bitset1.bits to i32*), i32 [[R8]]		; CHECK: [[R9:%[^ ]]] = load i8, i8 [[R8]]
; CHECK: [[R10:%[^ ]]] = load i32, i32 [[R9]]		; CHECK: [[R10:%[^ ]*]] = and i8 [[R9]], 1
; CHECK: [[R11:%[^ ]*]] = and i32 [[R5]], 31		; CHECK: [[R11:%[^ ]*]] = icmp ne i8 [[R10]], 0
; CHECK: [[R12:%[^ ]*]] = shl i32 1, [[R11]]
; CHECK: [[R13:%[^ ]*]] = and i32 [[R10]], [[R12]]
; CHECK: [[R14:%[^ ]*]] = icmp ne i32 [[R13]], 0

; CHECK: [[R16:%[^ ]]] = phi i1 [ false, {{%[^ ]}} ], [ [[R14]], {{%[^ ]*}} ]		; CHECK: [[R16:%[^ ]]] = phi i1 [ false, {{%[^ ]}} ], [ [[R11]], {{%[^ ]*}} ]
%x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1")		%x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1")

; CHECK-NOT: llvm.bitset.test		; CHECK-NOT: llvm.bitset.test
%y = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1")		%y = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1")

; CHECK: ret i1 [[R16]]		; CHECK: ret i1 [[R16]]
ret i1 %x		ret i1 %x
}		}
Show All 21 Lines	define i1 @baz(i32* %p) {
; CHECK: [[T1:%[^ ]]] = ptrtoint i8 [[T0]] to i32		; CHECK: [[T1:%[^ ]]] = ptrtoint i8 [[T0]] to i32
; CHECK: [[T2:%[^ ]]] = sub i32 [[T1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } [[G]] to i32)		; CHECK: [[T2:%[^ ]]] = sub i32 [[T1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } [[G]] to i32)
; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 2		; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 2
; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 30		; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 30
; CHECK: [[T5:%[^ ]*]] = or i32 [[T3]], [[T4]]		; CHECK: [[T5:%[^ ]*]] = or i32 [[T3]], [[T4]]
; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 66		; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 66
; CHECK: br i1 [[T6]]		; CHECK: br i1 [[T6]]

; CHECK: [[T8:%[^ ]*]] = lshr i32 [[T5]], 5		; CHECK: [[T8:%[^ ]]] = getelementptr i8, i8 @bits1, i32 [[T5]]
; CHECK: [[T9:%[^ ]]] = getelementptr i32, i32 bitcast ([9 x i8]* @bitset3.bits to i32*), i32 [[T8]]		; CHECK: [[T9:%[^ ]]] = load i8, i8 [[T8]]
; CHECK: [[T10:%[^ ]]] = load i32, i32 [[T9]]		; CHECK: [[T10:%[^ ]*]] = and i8 [[T9]], 2
; CHECK: [[T11:%[^ ]*]] = and i32 [[T5]], 31		; CHECK: [[T11:%[^ ]*]] = icmp ne i8 [[T10]], 0
; CHECK: [[T12:%[^ ]*]] = shl i32 1, [[T11]]
; CHECK: [[T13:%[^ ]*]] = and i32 [[T10]], [[T12]]
; CHECK: [[T14:%[^ ]*]] = icmp ne i32 [[T13]], 0

; CHECK: [[T16:%[^ ]]] = phi i1 [ false, {{%[^ ]}} ], [ [[T14]], {{%[^ ]*}} ]		; CHECK: [[T16:%[^ ]]] = phi i1 [ false, {{%[^ ]}} ], [ [[T11]], {{%[^ ]*}} ]
%x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3")		%x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3")
; CHECK: ret i1 [[T16]]		; CHECK: ret i1 [[T16]]
ret i1 %x		ret i1 %x
}		}

; CHECK-NOT: !llvm.bitsets		; CHECK-NOT: !llvm.bitsets

unittests/Transforms/IPO/LowerBitSets.cpp

Show All 9 Lines
#include "llvm/Transforms/IPO/LowerBitSets.h"		#include "llvm/Transforms/IPO/LowerBitSets.h"
#include "gtest/gtest.h"		#include "gtest/gtest.h"

using namespace llvm;		using namespace llvm;

TEST(LowerBitSets, BitSetBuilder) {		TEST(LowerBitSets, BitSetBuilder) {
struct {		struct {
std::vector<uint64_t> Offsets;		std::vector<uint64_t> Offsets;
std::vector<uint8_t> Bits;		std::set<uint64_t> Bits;
uint64_t ByteOffset;		uint64_t ByteOffset;
uint64_t BitSize;		uint64_t BitSize;
unsigned AlignLog2;		unsigned AlignLog2;
bool IsSingleOffset;		bool IsSingleOffset;
bool IsAllOnes;		bool IsAllOnes;
} BSBTests[] = {		} BSBTests[] = {
{{}, {0}, 0, 1, 0, false, false},		{{}, {}, 0, 1, 0, false, false},
{{0}, {1}, 0, 1, 0, true, true},		{{0}, {0}, 0, 1, 0, true, true},
{{4}, {1}, 4, 1, 0, true, true},		{{4}, {0}, 4, 1, 0, true, true},
{{37}, {1}, 37, 1, 0, true, true},		{{37}, {0}, 37, 1, 0, true, true},
{{0, 1}, {3}, 0, 2, 0, false, true},		{{0, 1}, {0, 1}, 0, 2, 0, false, true},
{{0, 4}, {3}, 0, 2, 2, false, true},		{{0, 4}, {0, 1}, 0, 2, 2, false, true},
{{0, uint64_t(1) << 33}, {3}, 0, 2, 33, false, true},		{{0, uint64_t(1) << 33}, {0, 1}, 0, 2, 33, false, true},
{{3, 7}, {3}, 3, 2, 2, false, true},		{{3, 7}, {0, 1}, 3, 2, 2, false, true},
{{0, 1, 7}, {131}, 0, 8, 0, false, false},		{{0, 1, 7}, {0, 1, 7}, 0, 8, 0, false, false},
{{0, 2, 14}, {131}, 0, 8, 1, false, false},		{{0, 2, 14}, {0, 1, 7}, 0, 8, 1, false, false},
{{0, 1, 8}, {3, 1}, 0, 9, 0, false, false},		{{0, 1, 8}, {0, 1, 8}, 0, 9, 0, false, false},
{{0, 2, 16}, {3, 1}, 0, 9, 1, false, false},		{{0, 2, 16}, {0, 1, 8}, 0, 9, 1, false, false},
{{0, 1, 2, 3, 4, 5, 6, 7}, {255}, 0, 8, 0, false, true},		{{0, 1, 2, 3, 4, 5, 6, 7},
{{0, 1, 2, 3, 4, 5, 6, 7, 8}, {255, 1}, 0, 9, 0, false, true},		{0, 1, 2, 3, 4, 5, 6, 7},
		0,
		8,
		0,
		false,
		true},
		{{0, 1, 2, 3, 4, 5, 6, 7, 8},
		{0, 1, 2, 3, 4, 5, 6, 7, 8},
		0,
		9,
		0,
		false,
		true},
};		};

for (auto &&T : BSBTests) {		for (auto &&T : BSBTests) {
BitSetBuilder BSB;		BitSetBuilder BSB;
for (auto Offset : T.Offsets)		for (auto Offset : T.Offsets)
BSB.addOffset(Offset);		BSB.addOffset(Offset);

BitSetInfo BSI = BSB.build();		BitSetInfo BSI = BSB.build();
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	for (auto &&T : GLBTests) {

std::vector<uint64_t> ComputedLayout;		std::vector<uint64_t> ComputedLayout;
for (auto &&F : GLB.Fragments)		for (auto &&F : GLB.Fragments)
ComputedLayout.insert(ComputedLayout.end(), F.begin(), F.end());		ComputedLayout.insert(ComputedLayout.end(), F.begin(), F.end());

EXPECT_EQ(T.WantLayout, ComputedLayout);		EXPECT_EQ(T.WantLayout, ComputedLayout);
}		}
}		}

		TEST(LowerBitSets, ByteArrayBuilder) {
		struct BABAlloc {
		std::set<uint64_t> Bits;
		uint64_t BitSize;
		uint64_t WantByteOffset;
		uint8_t WantMask;
		};

		struct {
		std::vector<BABAlloc> Allocs;
		std::vector<uint8_t> WantBytes;
		} BABTests[] = {
		{{{{0}, 1, 0, 1}, {{0}, 1, 0, 2}}, {3}},
		{{{{0}, 16, 0, 1},
		{{1}, 15, 0, 2},
		{{2}, 14, 0, 4},
		{{3}, 13, 0, 8},
		{{4}, 12, 0, 0x10},
		{{5}, 11, 0, 0x20},
		{{6}, 10, 0, 0x40},
		{{7}, 9, 0, 0x80},
		{{0}, 7, 9, 0x80},
		{{0}, 6, 10, 0x40},
		{{0}, 5, 11, 0x20},
		{{0}, 4, 12, 0x10},
		{{0}, 3, 13, 8},
		{{0}, 2, 14, 4},
		{{0}, 1, 15, 2}},
		{1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 0, 0x80, 0x40, 0x20, 0x10, 8, 4,
		2}},
		};

		for (auto &&T : BABTests) {
		ByteArrayBuilder BABuilder;

		for (auto &&A : T.Allocs) {
		uint64_t GotByteOffset;
		uint8_t GotMask;

		BABuilder.allocate(A.Bits, A.BitSize, GotByteOffset, GotMask);
		EXPECT_EQ(A.WantByteOffset, GotByteOffset);
		EXPECT_EQ(A.WantMask, GotMask);
		}

		EXPECT_EQ(T.WantBytes, BABuilder.Bytes);
		}
		}

This is an archive of the discontinued LLVM Phabricator instance.

LowerBitSets: Use byte arrays instead of bit sets to represent in-memory bit sets.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 21059

include/llvm/Transforms/IPO/LowerBitSets.h

lib/Transforms/IPO/LowerBitSets.cpp

test/Transforms/LowerBitSets/simple.ll

unittests/Transforms/IPO/LowerBitSets.cpp

This is an archive of the discontinued LLVM Phabricator instance.

LowerBitSets: Use byte arrays instead of bit sets to represent in-memory bit sets.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 21059

include/llvm/Transforms/IPO/LowerBitSets.h

lib/Transforms/IPO/LowerBitSets.cpp

test/Transforms/LowerBitSets/simple.ll

unittests/Transforms/IPO/LowerBitSets.cpp

LowerBitSets: Use byte arrays instead of bit sets to represent in-memory bit sets.
ClosedPublic