diff --git a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h --- a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h +++ b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h @@ -48,6 +48,29 @@ Instruction *promoteCallWithIfThenElse(CallSite CS, Function *Callee, MDNode *BranchWeights = nullptr); +/// Try to promote (devirtualize) a virtual call on an Alloca. Return true on +/// success. +/// +/// Look for a pattern like: +/// +/// %o = alloca %class.Impl +/// %1 = getelementptr %class.Impl, %class.Impl* %o, i64 0, i32 0, i32 0 +/// store i32 (...)** bitcast (i8** getelementptr inbounds +/// ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV4Impl, i64 0, inrange i32 0, i64 2) +/// to i32 (...)**), i32 (...)*** %1 +/// %2 = getelementptr inbounds %class.Impl, %class.Impl* %o, i64 0, i32 0 +/// %3 = bitcast %class.Interface* %2 to void (%class.Interface*)*** +/// %vtable.i = load void (%class.Interface*)**, void (%class.Interface*)*** %3 +/// %4 = load void (%class.Interface*)*, void (%class.Interface*)** %vtable.i +/// call void %4(%class.Interface* nonnull %2) +/// +/// @_ZTV4Impl = linkonce_odr dso_local unnamed_addr constant { [3 x i8*] } +/// { [3 x i8*] +/// [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI4Impl to i8*), +/// i8* bitcast (void (%class.Impl*)* @_ZN4Impl3RunEv to i8*)] } +/// +bool tryPromoteCall(CallSite &CS); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -452,6 +452,26 @@ StrippedPtr != StorePtr) continue; + // If the load and the store have the same base and constant offsets and + // the ranges don't overlap, ignore the store. This is a simple form of + // alias analysis. + APInt LoadOffset(DL.getTypeSizeInBits(Ptr->getType()), 0); + APInt StoreOffset(DL.getTypeSizeInBits( + SI->getPointerOperand()->getType()), 0); + Value *LoadBase = Ptr->stripAndAccumulateConstantOffsets( + DL, LoadOffset, /* AllowNonInbounds */ false); + Value *StoreBase = StorePtr->stripAndAccumulateConstantOffsets( + DL, StoreOffset, /* AllowNonInbounds */ false); + auto StoreAccessSize = LocationSize::precise( + DL.getTypeStoreSize(SI->getPointerOperand()->getType())); + ConstantRange LoadRange(LoadOffset, + LoadOffset + AccessSize.toRaw()); + ConstantRange StoreRange(StoreOffset, + StoreOffset + StoreAccessSize.toRaw()); + if (LoadBase == StoreBase && + LoadRange.intersectWith(StoreRange).isEmptySet()) + continue; + // If we have alias analysis and it says the store won't modify the loaded // value, ignore the store. if (AA && !isModSet(AA->getModRefInfo(SI, StrippedPtr, AccessSize))) diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" @@ -1096,10 +1097,20 @@ if (!IFI.InlinedCallSites.empty()) { int NewHistoryID = InlineHistory.size(); InlineHistory.push_back({&Callee, InlineHistoryID}); - for (CallSite &CS : reverse(IFI.InlinedCallSites)) - if (Function *NewCallee = CS.getCalledFunction()) + for (CallSite &CS : reverse(IFI.InlinedCallSites)) { + Function *NewCallee = CS.getCalledFunction(); + if (!NewCallee) { + // Try to promote an indirect (virtual) call without waiting for the + // post-inline cleanup and the next DevirtSCCRepeatedPass iteration + // because the next iteration may not happen and we may miss + // inlining it. + if (tryPromoteCall(CS)) + NewCallee = CS.getCalledFunction(); + } + if (NewCallee) if (!NewCallee->isDeclaration()) Calls.push_back({CS, NewHistoryID}); + } } if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CallPromotionUtils.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -458,4 +460,60 @@ return promoteCall(CallSite(NewInst), Callee); } +bool llvm::tryPromoteCall(CallSite &CS) { + assert(!CS.getCalledFunction()); + Module *M = CS.getCaller()->getParent(); + const DataLayout &DL = M->getDataLayout(); + Value *Callee = CS.getCalledValue(); + + LoadInst *VTableEntryLoad = dyn_cast(Callee); + if (!VTableEntryLoad) + return false; // Not a vtable entry load. + Value *VTableEntryPtr = VTableEntryLoad->getPointerOperand(); + APInt VTableOffset(DL.getTypeSizeInBits(VTableEntryPtr->getType()), 0); + Value *VTableBasePtr = VTableEntryPtr->stripAndAccumulateConstantOffsets( + DL, VTableOffset, /* AllowNonInbounds */ false); + LoadInst *VTableLoad = dyn_cast(VTableBasePtr); + if (!VTableLoad) + return false; // Not a vtable load. + Value *Object = VTableLoad->getPointerOperand(); + APInt ObjectOffset(DL.getTypeSizeInBits(Object->getType()), 0); + Value *ObjectBase = Object->stripAndAccumulateConstantOffsets( + DL, ObjectOffset, /* AllowNonInbounds */ false); + if (!(isa(ObjectBase) && ObjectOffset == 0)) + // Not an Alloca or the offset isn't zero. + return false; + + // Look for the vtable pointer store into the object by the ctor. + BasicBlock::iterator BBI(VTableLoad); + Value *VTable = FindAvailableLoadedValue( + VTableLoad, VTableLoad->getParent(), BBI, 0, nullptr, nullptr); + if (!VTable) + return false; // No vtable found. + APInt VTableOffsetGVBase(DL.getTypeSizeInBits(VTable->getType()), 0); + Value *VTableGVBase = VTable->stripAndAccumulateConstantOffsets( + DL, VTableOffsetGVBase, /* AllowNonInbounds */ false); + GlobalVariable *GV = dyn_cast(VTableGVBase); + if (!(GV && GV->isConstant() && GV->hasDefinitiveInitializer())) + // Not in the form of a global constant variable with an initializer. + return false; + + Constant *VTableGVInitializer = GV->getInitializer(); + APInt VTableGVOffset = VTableOffsetGVBase + VTableOffset; + if (!(VTableGVOffset.getActiveBits() <= 64)) + return false; // Out of range. + Constant *Ptr = getPointerAtOffset(VTableGVInitializer, + VTableGVOffset.getZExtValue(), + *M); + if (!Ptr) + return false; // No constant (function) pointer found. + Function *DirectCallee = dyn_cast(Ptr->stripPointerCasts()); + if (!DirectCallee) + return false; // No function pointer found. + + // Success. + promoteCall(CS, DirectCallee); + return true; +} + #undef DEBUG_TYPE diff --git a/llvm/test/Transforms/Inline/devirtualize-4.ll b/llvm/test/Transforms/Inline/devirtualize-4.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/devirtualize-4.ll @@ -0,0 +1,109 @@ +; RUN: opt < %s -passes='cgscc(devirt<4>(inline))' -S | FileCheck %s +; RUN: opt < %s -passes='default' -S | FileCheck %s + +; Check that DoNotOptimize is inlined into Test. +; CHECK: @_Z4Testv() +; CHECK-NOT: ret void +; CHECK: call void asm +; CHECK: ret void + +;template +;void DoNotOptimize(const T& var) { +; asm volatile("" : "+m"(const_cast(var))); +;} +; +;class Interface { +; public: +; virtual void Run() = 0; +;}; +; +;class Impl : public Interface { +; public: +; Impl() : f(3) {} +; void Run() { DoNotOptimize(this); } +; +; private: +; int f; +;}; +; +;static void IndirectRun(Interface& o) { o.Run(); } +; +;void Test() { +; Impl o; +; IndirectRun(o); +;} + +%class.Impl = type <{ %class.Interface, i32, [4 x i8] }> +%class.Interface = type { i32 (...)** } + +@_ZTV4Impl = linkonce_odr dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI4Impl to i8*), i8* bitcast (void (%class.Impl*)* @_ZN4Impl3RunEv to i8*)] }, align 8 +@_ZTVN10__cxxabiv120__si_class_type_infoE = external dso_local global i8* +@_ZTS4Impl = linkonce_odr dso_local constant [6 x i8] c"4Impl\00", align 1 +@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8* +@_ZTS9Interface = linkonce_odr dso_local constant [11 x i8] c"9Interface\00", align 1 +@_ZTI9Interface = linkonce_odr dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @_ZTS9Interface, i32 0, i32 0) }, align 8 +@_ZTI4Impl = linkonce_odr dso_local constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @_ZTS4Impl, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI9Interface to i8*) }, align 8 +@_ZTV9Interface = linkonce_odr dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI9Interface to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)] }, align 8 + +define dso_local void @_Z4Testv() local_unnamed_addr { +entry: + %o = alloca %class.Impl, align 8 + %0 = bitcast %class.Impl* %o to i8* + call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) + call void @_ZN4ImplC2Ev(%class.Impl* nonnull %o) + %1 = getelementptr inbounds %class.Impl, %class.Impl* %o, i64 0, i32 0 + call fastcc void @_ZL11IndirectRunR9Interface(%class.Interface* nonnull dereferenceable(8) %1) + call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +define linkonce_odr dso_local void @_ZN4ImplC2Ev(%class.Impl* %this) unnamed_addr align 2 { +entry: + %0 = getelementptr %class.Impl, %class.Impl* %this, i64 0, i32 0 + call void @_ZN9InterfaceC2Ev(%class.Interface* %0) + %1 = getelementptr %class.Impl, %class.Impl* %this, i64 0, i32 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV4Impl, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %1, align 8 + %f = getelementptr inbounds %class.Impl, %class.Impl* %this, i64 0, i32 1 + store i32 3, i32* %f, align 8 + ret void +} + +define internal fastcc void @_ZL11IndirectRunR9Interface(%class.Interface* dereferenceable(8) %o) unnamed_addr { +entry: + %0 = bitcast %class.Interface* %o to void (%class.Interface*)*** + %vtable = load void (%class.Interface*)**, void (%class.Interface*)*** %0, align 8 + %1 = load void (%class.Interface*)*, void (%class.Interface*)** %vtable, align 8 + call void %1(%class.Interface* nonnull %o) + ret void +} + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +define linkonce_odr dso_local void @_ZN9InterfaceC2Ev(%class.Interface* %this) unnamed_addr align 2 { +entry: + %0 = getelementptr %class.Interface, %class.Interface* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV9Interface, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define linkonce_odr dso_local void @_ZN4Impl3RunEv(%class.Impl* %this) unnamed_addr align 2 { +entry: + %ref.tmp = alloca %class.Impl*, align 8 + %0 = bitcast %class.Impl** %ref.tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) + store %class.Impl* %this, %class.Impl** %ref.tmp, align 8 + call void @_Z13DoNotOptimizeIP4ImplEvRKT_(%class.Impl** nonnull dereferenceable(8) %ref.tmp) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) + ret void +} + +declare dso_local void @__cxa_pure_virtual() unnamed_addr + +define linkonce_odr dso_local void @_Z13DoNotOptimizeIP4ImplEvRKT_(%class.Impl** dereferenceable(8) %var) local_unnamed_addr { +entry: + call void asm sideeffect "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(%class.Impl** nonnull %var, %class.Impl** nonnull %var) + ret void +} +