Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -144,6 +144,7 @@ void initializeGlobalDCELegacyPassPass(PassRegistry&); void initializeGlobalMergePass(PassRegistry&); void initializeGlobalOptLegacyPassPass(PassRegistry&); +void initializeGlobalSplitPass(PassRegistry&); void initializeGlobalsAAWrapperPassPass(PassRegistry&); void initializeGuardWideningLegacyPassPass(PassRegistry&); void initializeIPCPPass(PassRegistry&); Index: llvm/include/llvm/Transforms/IPO.h =================================================================== --- llvm/include/llvm/Transforms/IPO.h +++ llvm/include/llvm/Transforms/IPO.h @@ -221,6 +221,10 @@ /// metadata. ModulePass *createWholeProgramDevirtPass(); +/// This pass splits globals into pieces for the benefit of whole-program +/// devirtualization and control-flow integrity. +ModulePass *createGlobalSplitPass(); + //===----------------------------------------------------------------------===// // SampleProfilePass - Loads sample profile data from disk and generates // IR metadata to reflect the profile. Index: llvm/lib/Transforms/IPO/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/IPO/CMakeLists.txt +++ llvm/lib/Transforms/IPO/CMakeLists.txt @@ -12,6 +12,7 @@ FunctionImport.cpp GlobalDCE.cpp GlobalOpt.cpp + GlobalSplit.cpp IPConstantPropagation.cpp IPO.cpp InferFunctionAttrs.cpp Index: llvm/lib/Transforms/IPO/GlobalSplit.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/IPO/GlobalSplit.cpp @@ -0,0 +1,163 @@ +//===- GlobalSplit.cpp - global variable splitter -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass uses inbounds annotations on GEP indices to split globals where +// beneficial. Clang currently attaches these annotations to references to +// virtual table globals under the Itanium ABI for the benefit of the +// whole-program virtual call optimization and control flow integrity passes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" + +#include + +using namespace llvm; + +namespace { + +bool splitGlobal(GlobalVariable &GV) { + // If the address of the global is taken outside of the module, we cannot + // apply this transformation. + if (!GV.hasLocalLinkage()) + return false; + + // We currently only know how to split ConstantStructs. + auto *Init = dyn_cast_or_null(GV.getInitializer()); + if (!Init) + return false; + + // Verify that each user of the global is an inrange getelementptr constant. + // From this it follows that any loads from or stores to that global must use + // a pointer derived from an inrange getelementptr constant, which is + // sufficient to allow us to apply the splitting transform. + for (User *U : GV.users()) { + if (!isa(U)) + return false; + + auto *GEP = dyn_cast(U); + if (!GEP || !GEP->getInRangeIndex() || *GEP->getInRangeIndex() != 1 || + !isa(GEP->getOperand(1)) || + !cast(GEP->getOperand(1))->isZero() || + !isa(GEP->getOperand(2))) + return false; + } + + SmallVector Types; + GV.getMetadata(LLVMContext::MD_type, Types); + + const DataLayout &DL = GV.getParent()->getDataLayout(); + const StructLayout *SL = DL.getStructLayout(Init->getType()); + + IntegerType *Int32Ty = Type::getInt32Ty(GV.getContext()); + + std::vector SplitGlobals(Init->getNumOperands()); + for (unsigned I = 0; I != Init->getNumOperands(); ++I) { + // Build a global representing this split piece. + auto *SplitGV = + new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(), + GV.isConstant(), GlobalValue::PrivateLinkage, + Init->getOperand(I), GV.getName() + "." + utostr(I)); + SplitGlobals[I] = SplitGV; + + unsigned SplitBegin = SL->getElementOffset(I); + unsigned SplitEnd = (I == Init->getNumOperands() - 1) + ? SL->getSizeInBytes() + : SL->getElementOffset(I + 1); + + // Rebuild type metadata, adjusting by the split offset. + for (MDNode *Type : Types) { + uint64_t ByteOffset = cast( + cast(Type->getOperand(0))->getValue()) + ->getZExtValue(); + if (ByteOffset < SplitBegin || ByteOffset >= SplitEnd) + continue; + SplitGV->addMetadata( + LLVMContext::MD_type, + *MDNode::get(GV.getContext(), + {ConstantAsMetadata::get( + ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)), + Type->getOperand(1)})); + } + } + + for (User *U : GV.users()) { + auto *GEP = cast(U); + unsigned I = cast(GEP->getOperand(2))->getZExtValue(); + if (I >= SplitGlobals.size()) + continue; + + std::vector Ops; + Ops.push_back(ConstantInt::get(Int32Ty, 0)); + for (unsigned I = 3; I != GEP->getNumOperands(); ++I) + Ops.push_back(GEP->getOperand(I)); + + auto *NewGEP = ConstantExpr::getGetElementPtr( + SplitGlobals[I]->getInitializer()->getType(), SplitGlobals[I], Ops, + GEP->isInBounds()); + GEP->replaceAllUsesWith(NewGEP); + } + + // Finally, remove the original global. Any remaining uses refer to invalid + // elements of the global, so replace with undef. + if (!GV.use_empty()) + GV.replaceAllUsesWith(UndefValue::get(GV.getType())); + GV.eraseFromParent(); + return true; +} + +bool splitGlobals(Module &M) { + // First, see if the module uses either of the llvm.type.test or + // llvm.type.checked.load intrinsics, which indicates that splitting globals + // may be beneficial. + Function *TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_test)); + Function *TypeCheckedLoadFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); + if ((!TypeTestFunc || TypeTestFunc->use_empty()) && + (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) + return false; + + bool Changed = false; + for (auto I = M.global_begin(); I != M.global_end();) { + GlobalVariable &GV = *I; + ++I; + Changed |= splitGlobal(GV); + } + return Changed; +} + +struct GlobalSplit : public ModulePass { + static char ID; + GlobalSplit() : ModulePass(ID) { + initializeGlobalSplitPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) { + if (skipModule(M)) + return false; + + return splitGlobals(M); + } +}; + +} + +INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false) +char GlobalSplit::ID = 0; + +ModulePass *llvm::createGlobalSplitPass() { + return new GlobalSplit; +} Index: llvm/lib/Transforms/IPO/IPO.cpp =================================================================== --- llvm/lib/Transforms/IPO/IPO.cpp +++ llvm/lib/Transforms/IPO/IPO.cpp @@ -32,6 +32,7 @@ initializeForceFunctionAttrsLegacyPassPass(Registry); initializeGlobalDCELegacyPassPass(Registry); initializeGlobalOptLegacyPassPass(Registry); + initializeGlobalSplitPass(Registry); initializeIPCPPass(Registry); initializeAlwaysInlinerLegacyPassPass(Registry); initializeSimpleInlinerPass(Registry); Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -688,6 +688,11 @@ PM.add(createPostOrderFunctionAttrsLegacyPass()); PM.add(createReversePostOrderFunctionAttrsPass()); + // Split globals using inbounds annotations on GEP indices. This can help + // improve the quality of generated code when virtual constant propagation or + // control flow integrity are enabled. + PM.add(createGlobalSplitPass()); + // Apply whole-program devirtualization and virtual constant propagation. PM.add(createWholeProgramDevirtPass()); Index: llvm/test/Transforms/GlobalSplit/basic.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GlobalSplit/basic.ll @@ -0,0 +1,56 @@ +; RUN: opt -S -globalsplit %s | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: @vtt = constant [3 x i8*] [i8* bitcast ([2 x i8* ()*]* @global.0 to i8*), i8* bitcast (i8* ()** getelementptr inbounds ([2 x i8* ()*], [2 x i8* ()*]* @global.0, i32 0, i32 1) to i8*), i8* bitcast ([1 x i8* ()*]* @global.1 to i8*)] +@vtt = constant [3 x i8*] [ + i8* bitcast (i8* ()** getelementptr ({ [2 x i8* ()*], [1 x i8* ()*] }, { [2 x i8* ()*], [1 x i8* ()*] }* @global, i32 0, inrange i32 0, i32 0) to i8*), + i8* bitcast (i8* ()** getelementptr ({ [2 x i8* ()*], [1 x i8* ()*] }, { [2 x i8* ()*], [1 x i8* ()*] }* @global, i32 0, inrange i32 0, i32 1) to i8*), + i8* bitcast (i8* ()** getelementptr ({ [2 x i8* ()*], [1 x i8* ()*] }, { [2 x i8* ()*], [1 x i8* ()*] }* @global, i32 0, inrange i32 1, i32 0) to i8*) +] + +; CHECK-NOT: @global = +; CHECK: @global.0 = private constant [2 x i8* ()*] [i8* ()* @f1, i8* ()* @f2], !type [[T1:![0-9]+$]] +; CHECK: @global.1 = private constant [1 x i8* ()*] [i8* ()* @f3], !type [[T2:![0-9]+$]] +; CHECK-NOT: @global = +@global = internal constant { [2 x i8* ()*], [1 x i8* ()*] } { + [2 x i8* ()*] [i8* ()* @f1, i8* ()* @f2], + [1 x i8* ()*] [i8* ()* @f3] +}, !type !0, !type !1 + +; CHECK: define i8* @f1() +define i8* @f1() { + ; CHECK-NEXT: ret i8* bitcast ([2 x i8* ()*]* @global.0 to i8*) + ret i8* bitcast (i8* ()** getelementptr ({ [2 x i8* ()*], [1 x i8* ()*] }, { [2 x i8* ()*], [1 x i8* ()*] }* @global, i32 0, inrange i32 0, i32 0) to i8*) +} + +; CHECK: define i8* @f2() +define i8* @f2() { + ; CHECK-NEXT: ret i8* bitcast (i8* ()** getelementptr inbounds ([2 x i8* ()*], [2 x i8* ()*]* @global.0, i32 0, i32 1) to i8*) + ret i8* bitcast (i8* ()** getelementptr ({ [2 x i8* ()*], [1 x i8* ()*] }, { [2 x i8* ()*], [1 x i8* ()*] }* @global, i32 0, inrange i32 0, i32 1) to i8*) +} + +; CHECK: define i8* @f3() +define i8* @f3() { + ; CHECK-NEXT: ret i8* bitcast (i8* ()** getelementptr inbounds ([2 x i8* ()*], [2 x i8* ()*]* @global.0, i64 1, i32 0) to i8*) + ret i8* bitcast (i8* ()** getelementptr ({ [2 x i8* ()*], [1 x i8* ()*] }, { [2 x i8* ()*], [1 x i8* ()*] }* @global, i32 0, inrange i32 0, i32 2) to i8*) +} + +; CHECK: define i8* @f4() +define i8* @f4() { + ; CHECK-NEXT: ret i8* bitcast ([1 x i8* ()*]* @global.1 to i8*) + ret i8* bitcast (i8* ()** getelementptr ({ [2 x i8* ()*], [1 x i8* ()*] }, { [2 x i8* ()*], [1 x i8* ()*] }* @global, i32 0, inrange i32 1, i32 0) to i8*) +} + +define void @foo() { + %p = call i1 @llvm.type.test(i8* null, metadata !"") + ret void +} + +declare i1 @llvm.type.test(i8*, metadata) nounwind readnone + +; CHECK: [[T1]] = !{i32 8, !"foo"} +; CHECK: [[T2]] = !{i32 0, !"bar"} +!0 = !{i32 8, !"foo"} +!1 = !{i32 16, !"bar"} Index: llvm/test/Transforms/GlobalSplit/non-beneficial.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GlobalSplit/non-beneficial.ll @@ -0,0 +1,24 @@ +; RUN: opt -S -globalsplit %s | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: @global = +@global = internal constant { [2 x i8* ()*], [1 x i8* ()*] } { + [2 x i8* ()*] [i8* ()* @f, i8* ()* @g], + [1 x i8* ()*] [i8* ()* @h] +} + +define i8* @f() { + ret i8* bitcast (i8* ()** getelementptr ({ [2 x i8* ()*], [1 x i8* ()*] }, { [2 x i8* ()*], [1 x i8* ()*] }* @global, i32 0, inrange i32 0, i32 0) to i8*) +} + +define i8* @g() { + ret i8* null +} + +define i8* @h() { + ret i8* null +} + +!0 = !{i32 16} Index: llvm/test/Transforms/GlobalSplit/nonlocal.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GlobalSplit/nonlocal.ll @@ -0,0 +1,29 @@ +; RUN: opt -S -globalsplit %s | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: @global = +@global = constant { [2 x i8* ()*], [1 x i8* ()*] } { + [2 x i8* ()*] [i8* ()* @f, i8* ()* @g], + [1 x i8* ()*] [i8* ()* @h] +} + +define i8* @f() { + ret i8* bitcast (i8* ()** getelementptr ({ [2 x i8* ()*], [1 x i8* ()*] }, { [2 x i8* ()*], [1 x i8* ()*] }* @global, i32 0, inrange i32 0, i32 0) to i8*) +} + +define i8* @g() { + ret i8* null +} + +define i8* @h() { + ret i8* null +} + +define void @foo() { + %p = call i1 @llvm.type.test(i8* null, metadata !"") + ret void +} + +declare i1 @llvm.type.test(i8*, metadata) nounwind readnone