Index: include/llvm/Transforms/IPO/WholeProgramDevirt.h =================================================================== --- include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -134,6 +134,9 @@ // Whether the target is big endian. bool IsBigEndian; + // Whether at least one call site to the target was devirtualized. + bool WasDevirt; + // The minimum byte offset before the address point. This covers the bytes in // the vtable object before the address point (e.g. RTTI, access-to-top, // vtables for other base classes) and is equal to the offset from the start Index: lib/Transforms/IPO/WholeProgramDevirt.cpp =================================================================== --- lib/Transforms/IPO/WholeProgramDevirt.cpp +++ lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -217,15 +217,18 @@ // of that field for details. unsigned *NumUnsafeUses; - void emitRemark() { + void emitRemark(const Twine &OptName, const Twine &TargetName) { Function *F = CS.getCaller(); - emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, - CS.getInstruction()->getDebugLoc(), - "devirtualized call"); + emitOptimizationRemark( + F->getContext(), DEBUG_TYPE, *F, + CS.getInstruction()->getDebugLoc(), + OptName + ": devirtualized a call to " + TargetName); } - void replaceAndErase(Value *New) { - emitRemark(); + void replaceAndErase(const Twine &OptName, const Twine &TargetName, + bool RemarksEnabled, Value *New) { + if (RemarksEnabled) + emitRemark(OptName, TargetName); CS->replaceAllUsesWith(New); if (auto II = dyn_cast(CS.getInstruction())) { BranchInst::Create(II->getNormalDest(), CS.getInstruction()); @@ -244,6 +247,8 @@ PointerType *Int8PtrTy; IntegerType *Int32Ty; + bool RemarksEnabled; + MapVector> CallSlots; // This map keeps track of the number of "unsafe" uses of a loaded function @@ -259,7 +264,10 @@ DevirtModule(Module &M) : M(M), Int8Ty(Type::getInt8Ty(M.getContext())), Int8PtrTy(Type::getInt8PtrTy(M.getContext())), - Int32Ty(Type::getInt32Ty(M.getContext())) {} + Int32Ty(Type::getInt32Ty(M.getContext())), + RemarksEnabled(areRemarksEnabled()) {} + + bool areRemarksEnabled(); void scanTypeTestUsers(Function *TypeTestFunc, Function *AssumeFunc); void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc); @@ -271,16 +279,16 @@ tryFindVirtualCallTargets(std::vector &TargetsForSlot, const std::set &TypeMemberInfos, uint64_t ByteOffset); - bool trySingleImplDevirt(ArrayRef TargetsForSlot, + bool trySingleImplDevirt(MutableArrayRef TargetsForSlot, MutableArrayRef CallSites); bool tryEvaluateFunctionsWithArgs( MutableArrayRef TargetsForSlot, ArrayRef Args); bool tryUniformRetValOpt(IntegerType *RetType, - ArrayRef TargetsForSlot, + MutableArrayRef TargetsForSlot, MutableArrayRef CallSites); bool tryUniqueRetValOpt(unsigned BitWidth, - ArrayRef TargetsForSlot, + MutableArrayRef TargetsForSlot, MutableArrayRef CallSites); bool tryVirtualConstProp(MutableArrayRef TargetsForSlot, ArrayRef CallSites); @@ -393,7 +401,7 @@ } bool DevirtModule::trySingleImplDevirt( - ArrayRef TargetsForSlot, + MutableArrayRef TargetsForSlot, MutableArrayRef CallSites) { // See if the program contains a single implementation of this virtual // function. @@ -402,9 +410,12 @@ if (TheFn != Target.Fn) return false; + if (RemarksEnabled) + TargetsForSlot[0].WasDevirt = true; // If so, update each call site to call that implementation directly. for (auto &&VCallSite : CallSites) { - VCallSite.emitRemark(); + if (RemarksEnabled) + VCallSite.emitRemark("single-impl", TheFn->getName()); VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast( TheFn, VCallSite.CS.getCalledValue()->getType())); // This use is no longer unsafe. @@ -442,7 +453,7 @@ } bool DevirtModule::tryUniformRetValOpt( - IntegerType *RetType, ArrayRef TargetsForSlot, + IntegerType *RetType, MutableArrayRef TargetsForSlot, MutableArrayRef CallSites) { // Uniform return value optimization. If all functions return the same // constant, replace all calls with that constant. @@ -453,12 +464,16 @@ auto TheRetValConst = ConstantInt::get(RetType, TheRetVal); for (auto Call : CallSites) - Call.replaceAndErase(TheRetValConst); + Call.replaceAndErase("uniform-ret-val", TargetsForSlot[0].Fn->getName(), + RemarksEnabled, TheRetValConst); + if (RemarksEnabled) + for (auto &&Target : TargetsForSlot) + Target.WasDevirt = true; return true; } bool DevirtModule::tryUniqueRetValOpt( - unsigned BitWidth, ArrayRef TargetsForSlot, + unsigned BitWidth, MutableArrayRef TargetsForSlot, MutableArrayRef CallSites) { // IsOne controls whether we look for a 0 or a 1. auto tryUniqueRetValOptFor = [&](bool IsOne) { @@ -482,8 +497,14 @@ OneAddr = B.CreateConstGEP1_64(OneAddr, UniqueMember->Offset); Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable, OneAddr); - Call.replaceAndErase(Cmp); + Call.replaceAndErase("unique-ret-val", TargetsForSlot[0].Fn->getName(), + RemarksEnabled, Cmp); } + // Update devirtualization statistics for targets. + if (RemarksEnabled) + for (auto &&Target : TargetsForSlot) + Target.WasDevirt = true; + return true; }; @@ -591,6 +612,10 @@ setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte, OffsetBit); + if (RemarksEnabled) + for (auto &&Target : TargetsForSlot) + Target.WasDevirt = true; + // Rewrite each call to a load from OffsetByte/OffsetBit. for (auto Call : CSByConstantArg.second) { IRBuilder<> B(Call.CS.getInstruction()); @@ -600,27 +625,21 @@ Value *Bit = ConstantInt::get(Int8Ty, 1ULL << OffsetBit); Value *BitsAndBit = B.CreateAnd(Bits, Bit); auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0)); - Call.replaceAndErase(IsBitSet); + Call.replaceAndErase("virtual-const-prop-1-bit", + TargetsForSlot[0].Fn->getName(), + RemarksEnabled, IsBitSet); } else { Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo()); Value *Val = B.CreateLoad(RetType, ValAddr); - Call.replaceAndErase(Val); + Call.replaceAndErase("virtual-const-prop", + TargetsForSlot[0].Fn->getName(), + RemarksEnabled, Val); } } } return true; } -static void emitTargetsRemarks(const std::vector &TargetsForSlot) { - for (const VirtualCallTarget &Target : TargetsForSlot) { - Function *F = Target.Fn; - DISubprogram *SP = F->getSubprogram(); - DebugLoc DL = SP ? DebugLoc::get(SP->getScopeLine(), 0, SP) : DebugLoc(); - emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, DL, - std::string("devirtualized ") + F->getName().str()); - } -} - void DevirtModule::rebuildGlobal(VTableBits &B) { if (B.Before.Bytes.empty() && B.After.Bytes.empty()) return; @@ -666,6 +685,15 @@ B.GV->eraseFromParent(); } +bool DevirtModule::areRemarksEnabled() { + const auto &FL = M.getFunctionList(); + if (FL.empty()) + return false; + const Function &Fn = FL.front(); + auto DI = DiagnosticInfoOptimizationRemark(DEBUG_TYPE, Fn, DebugLoc(), ""); + return DI.isEnabled(); +} + void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc, Function *AssumeFunc) { // Find all virtual calls via a virtual table pointer %p under an assumption @@ -817,6 +845,7 @@ // For each (type, offset) pair: bool DidVirtualConstProp = false; + std::map DevirtTargets; for (auto &S : CallSlots) { // Search each of the members of the type identifier for the virtual // function implementation at offset S.first.ByteOffset, and add to @@ -826,14 +855,25 @@ S.first.ByteOffset)) continue; - if (trySingleImplDevirt(TargetsForSlot, S.second)) { - emitTargetsRemarks(TargetsForSlot); - continue; - } - - if (tryVirtualConstProp(TargetsForSlot, S.second)) { - emitTargetsRemarks(TargetsForSlot); - DidVirtualConstProp = true; + if (!trySingleImplDevirt(TargetsForSlot, S.second) && + tryVirtualConstProp(TargetsForSlot, S.second)) + DidVirtualConstProp = true; + + // Collect functions devirtualized at least for one call site for stats. + if (RemarksEnabled) + for (const auto &T : TargetsForSlot) + if (T.WasDevirt) + DevirtTargets[T.Fn->getName()] = T.Fn; + } + + if (RemarksEnabled) { + // Generate remarks for each devirtualized function. + for (const auto &DT : DevirtTargets) { + Function *F = DT.second; + DISubprogram *SP = F->getSubprogram(); + DebugLoc DL = SP ? DebugLoc::get(SP->getScopeLine(), 0, SP) : DebugLoc(); + emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, DL, + Twine("devirtualized ") + F->getName()); } } Index: test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll =================================================================== --- test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll +++ test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll @@ -3,8 +3,9 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" -; CHECK: remark: :0:0: devirtualized call -; CHECK-NOT: devirtualized call +; CHECK: remark: :0:0: single-impl: devirtualized a call to vf +; CHECK: remark: :0:0: devirtualized vf +; CHECK-NOT: devirtualized @vt1 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)], !type !0 @vt2 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)], !type !0 Index: test/Transforms/WholeProgramDevirt/devirt-single-impl.ll =================================================================== --- test/Transforms/WholeProgramDevirt/devirt-single-impl.ll +++ test/Transforms/WholeProgramDevirt/devirt-single-impl.ll @@ -3,19 +3,19 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" -; CHECK: remark: :0:0: devirtualized call -; CHECK: remark: :0:0: devirtualized vf -; CHECK: remark: :0:0: devirtualized vf +; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:13:0: devirtualized vf +; CHECK-NOT: devirtualized -@vt1 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)], !type !0 -@vt2 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)], !type !0 +@vt1 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)], !type !8 +@vt2 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)], !type !8 -define void @vf(i8* %this) { +define void @vf(i8* %this) #0 !dbg !7 { ret void } ; CHECK: define void @call -define void @call(i8* %obj) { +define void @call(i8* %obj) #1 !dbg !5 { %vtableptr = bitcast i8* %obj to [1 x i8*]** %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr %vtablei8 = bitcast [1 x i8*]* %vtable to i8* @@ -25,11 +25,23 @@ %fptr = load i8*, i8** %fptrptr %fptr_casted = bitcast i8* %fptr to void (i8*)* ; CHECK: call void @vf( - call void %fptr_casted(i8* %obj) + call void %fptr_casted(i8* %obj), !dbg !6 ret void } declare i1 @llvm.type.test(i8*, metadata) declare void @llvm.assume(i1) -!0 = !{i32 0, !"typeid"} +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "devirt-single.cc", directory: ".") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!"clang version 4.0.0 (trunk 278098)"} +!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!6 = !DILocation(line: 30, column: 32, scope: !5) +!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!8 = !{i32 0, !"typeid"} Index: test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll =================================================================== --- test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll +++ test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll @@ -3,22 +3,16 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" -; CHECK: remark: :0:0: devirtualized call +; CHECK: remark: :0:0: virtual-const-prop: devirtualized a call to vf1i32 +; CHECK: remark: :0:0: virtual-const-prop-1-bit: devirtualized a call to vf1i1 +; CHECK: remark: :0:0: virtual-const-prop-1-bit: devirtualized a call to vf0i1 +; CHECK: remark: :0:0: devirtualized vf0i1 +; CHECK: remark: :0:0: devirtualized vf1i1 ; CHECK: remark: :0:0: devirtualized vf1i32 ; CHECK: remark: :0:0: devirtualized vf2i32 ; CHECK: remark: :0:0: devirtualized vf3i32 ; CHECK: remark: :0:0: devirtualized vf4i32 -; CHECK: remark: :0:0: devirtualized call -; CHECK: remark: :0:0: devirtualized vf1i1 -; CHECK: remark: :0:0: devirtualized vf0i1 -; CHECK: remark: :0:0: devirtualized vf1i1 -; CHECK: remark: :0:0: devirtualized vf0i1 -; CHECK: remark: :0:0: devirtualized call -; CHECK: remark: :0:0: devirtualized vf0i1 -; CHECK: remark: :0:0: devirtualized vf1i1 -; CHECK: remark: :0:0: devirtualized vf0i1 -; CHECK: remark: :0:0: devirtualized vf1i1 -; CHECK-NOT: devirtualized call +; CHECK-NOT: devirtualized ; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\01\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]] @vt1 = constant [3 x i8*] [