Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Transforms/IPO/GlobalSplit.cpp
- This file was added.
//===- GlobalSplit.cpp - global variable splitter -------------------------===// | |||||
// | |||||
// The LLVM Compiler Infrastructure | |||||
// | |||||
// This file is distributed under the University of Illinois Open Source | |||||
// License. See LICENSE.TXT for details. | |||||
// | |||||
//===----------------------------------------------------------------------===// | |||||
// | |||||
// This pass uses inbounds annotations on GEP indices to split globals where | |||||
// beneficial. Clang currently attaches these annotations to references to | |||||
// virtual table globals under the Itanium ABI for the benefit of the | |||||
// whole-program virtual call optimization and control flow integrity passes. | |||||
// | |||||
//===----------------------------------------------------------------------===// | |||||
#include "llvm/Transforms/IPO.h" | |||||
#include "llvm/ADT/StringExtras.h" | |||||
#include "llvm/IR/Constants.h" | |||||
#include "llvm/IR/GlobalVariable.h" | |||||
#include "llvm/IR/Intrinsics.h" | |||||
#include "llvm/IR/Module.h" | |||||
#include "llvm/IR/Operator.h" | |||||
#include "llvm/Pass.h" | |||||
#include <set> | |||||
using namespace llvm; | |||||
namespace { | |||||
bool splitGlobal(GlobalVariable &GV) { | |||||
// If the address of the global is taken outside of the module, we cannot | |||||
// apply this transformation. | |||||
if (!GV.hasLocalLinkage()) | |||||
return false; | |||||
// We currently only know how to split ConstantStructs. | |||||
auto *Init = dyn_cast_or_null<ConstantStruct>(GV.getInitializer()); | |||||
if (!Init) | |||||
return false; | |||||
// Verify that each user of the global is an inrange getelementptr constant. | |||||
// From this it follows that any loads from or stores to that global must use | |||||
// a pointer derived from an inrange getelementptr constant, which is | |||||
// sufficient to allow us to apply the splitting transform. | |||||
for (User *U : GV.users()) { | |||||
if (!isa<Constant>(U)) | |||||
return false; | |||||
auto *GEP = dyn_cast<GEPOperator>(U); | |||||
if (!GEP || !GEP->getInRangeIndex() || *GEP->getInRangeIndex() != 1 || | |||||
!isa<ConstantInt>(GEP->getOperand(1)) || | |||||
!cast<ConstantInt>(GEP->getOperand(1))->isZero() || | |||||
!isa<ConstantInt>(GEP->getOperand(2))) | |||||
return false; | |||||
} | |||||
SmallVector<MDNode *, 2> Types; | |||||
GV.getMetadata(LLVMContext::MD_type, Types); | |||||
const DataLayout &DL = GV.getParent()->getDataLayout(); | |||||
const StructLayout *SL = DL.getStructLayout(Init->getType()); | |||||
IntegerType *Int32Ty = Type::getInt32Ty(GV.getContext()); | |||||
std::vector<GlobalVariable *> SplitGlobals(Init->getNumOperands()); | |||||
for (unsigned I = 0; I != Init->getNumOperands(); ++I) { | |||||
// Build a global representing this split piece. | |||||
auto *SplitGV = | |||||
new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(), | |||||
GV.isConstant(), GlobalValue::PrivateLinkage, | |||||
Init->getOperand(I), GV.getName() + "." + utostr(I)); | |||||
SplitGlobals[I] = SplitGV; | |||||
unsigned SplitBegin = SL->getElementOffset(I); | |||||
unsigned SplitEnd = (I == Init->getNumOperands() - 1) | |||||
? SL->getSizeInBytes() | |||||
: SL->getElementOffset(I + 1); | |||||
// Rebuild type metadata, adjusting by the split offset. | |||||
for (MDNode *Type : Types) { | |||||
eugenis: This is N^2.
N is probably very small in practice, but I guess there can be outliers. Consider… | |||||
pccAuthorUnsubmitted Not Done ReplyInline ActionsOn my machine this pass takes 0.4 seconds to run during an LTO link of chrome. So I don't think it's worth making major changes for performance here. pcc: On my machine this pass takes 0.4 seconds to run during an LTO link of chrome. So I don't think… | |||||
eugenisUnsubmitted Not Done ReplyInline ActionsOK, that's reasonable. eugenis: OK, that's reasonable. | |||||
uint64_t ByteOffset = cast<ConstantInt>( | |||||
cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) | |||||
->getZExtValue(); | |||||
if (ByteOffset < SplitBegin || ByteOffset >= SplitEnd) | |||||
continue; | |||||
SplitGV->addMetadata( | |||||
LLVMContext::MD_type, | |||||
*MDNode::get(GV.getContext(), | |||||
{ConstantAsMetadata::get( | |||||
ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)), | |||||
Type->getOperand(1)})); | |||||
} | |||||
} | |||||
for (User *U : GV.users()) { | |||||
auto *GEP = cast<GEPOperator>(U); | |||||
unsigned I = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); | |||||
if (I >= SplitGlobals.size()) | |||||
continue; | |||||
std::vector<Value *> Ops; | |||||
eugenisUnsubmitted SmallVector? eugenis: SmallVector? | |||||
Ops.push_back(ConstantInt::get(Int32Ty, 0)); | |||||
for (unsigned I = 3; I != GEP->getNumOperands(); ++I) | |||||
Ops.push_back(GEP->getOperand(I)); | |||||
auto *NewGEP = ConstantExpr::getGetElementPtr( | |||||
SplitGlobals[I]->getInitializer()->getType(), SplitGlobals[I], Ops, | |||||
GEP->isInBounds()); | |||||
GEP->replaceAllUsesWith(NewGEP); | |||||
} | |||||
// Finally, remove the original global. Any remaining uses refer to invalid | |||||
// elements of the global, so replace with undef. | |||||
if (!GV.use_empty()) | |||||
GV.replaceAllUsesWith(UndefValue::get(GV.getType())); | |||||
GV.eraseFromParent(); | |||||
return true; | |||||
} | |||||
bool splitGlobals(Module &M) { | |||||
// First, see if the module uses either of the llvm.type.test or | |||||
// llvm.type.checked.load intrinsics, which indicates that splitting globals | |||||
// may be beneficial. | |||||
Function *TypeTestFunc = | |||||
M.getFunction(Intrinsic::getName(Intrinsic::type_test)); | |||||
Function *TypeCheckedLoadFunc = | |||||
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); | |||||
if ((!TypeTestFunc || TypeTestFunc->use_empty()) && | |||||
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) | |||||
return false; | |||||
bool Changed = false; | |||||
for (auto I = M.global_begin(); I != M.global_end();) { | |||||
GlobalVariable &GV = *I; | |||||
++I; | |||||
Changed |= splitGlobal(GV); | |||||
} | |||||
return Changed; | |||||
} | |||||
struct GlobalSplit : public ModulePass { | |||||
static char ID; | |||||
GlobalSplit() : ModulePass(ID) { | |||||
initializeGlobalSplitPass(*PassRegistry::getPassRegistry()); | |||||
} | |||||
bool runOnModule(Module &M) { | |||||
if (skipModule(M)) | |||||
return false; | |||||
return splitGlobals(M); | |||||
} | |||||
}; | |||||
} | |||||
INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false) | |||||
char GlobalSplit::ID = 0; | |||||
ModulePass *llvm::createGlobalSplitPass() { | |||||
return new GlobalSplit; | |||||
} |
This is N^2.
N is probably very small in practice, but I guess there can be outliers. Consider switching to an NlogN implementation?