Index: compiler-rt/trunk/test/cfi/simple-pass.cpp =================================================================== --- compiler-rt/trunk/test/cfi/simple-pass.cpp +++ compiler-rt/trunk/test/cfi/simple-pass.cpp @@ -1,5 +1,7 @@ // RUN: %clangxx_cfi -o %t %s // RUN: %run %t +// RUN: %clangxx_cfi -mretpoline -o %t2 %s +// RUN: %run %t2 // Tests that the CFI mechanism does not crash the program when making various // kinds of valid calls involving classes with various different linkages and Index: llvm/trunk/include/llvm/ADT/PointerUnion.h =================================================================== --- llvm/trunk/include/llvm/ADT/PointerUnion.h +++ llvm/trunk/include/llvm/ADT/PointerUnion.h @@ -346,6 +346,12 @@ }; }; +template +bool operator<(PointerUnion3 lhs, + PointerUnion3 rhs) { + return lhs.getOpaqueValue() < rhs.getOpaqueValue(); +} + /// A pointer union of four pointer types. See documentation for PointerUnion /// for usage. template Index: llvm/trunk/include/llvm/CodeGen/TargetOpcodes.def =================================================================== --- llvm/trunk/include/llvm/CodeGen/TargetOpcodes.def +++ llvm/trunk/include/llvm/CodeGen/TargetOpcodes.def @@ -187,6 +187,8 @@ /// patched to insert instrumentation instructions. HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL) +HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. Index: llvm/trunk/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.td +++ llvm/trunk/include/llvm/IR/Intrinsics.td @@ -874,6 +874,10 @@ [llvm_ptr_ty, llvm_i32_ty, llvm_metadata_ty], [IntrNoMem]>; +// Create a branch funnel that implements an indirect call to a limited set of +// callees. This needs to be a musttail call. +def int_icall_branch_funnel : Intrinsic<[], [llvm_vararg_ty], []>; + def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], [IntrReadMem, IntrArgMemOnly]>; Index: llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h +++ llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h @@ -656,8 +656,11 @@ struct WholeProgramDevirtResolution { enum Kind { - Indir, ///< Just do a regular virtual call - SingleImpl, ///< Single implementation devirtualization + Indir, ///< Just do a regular virtual call + SingleImpl, ///< Single implementation devirtualization + BranchFunnel, ///< When retpoline mitigation is enabled, use a branch funnel + ///< that is defined in the merged module. Otherwise same as + ///< Indir. } TheKind = Indir; std::string SingleImplName; Index: llvm/trunk/include/llvm/IR/ModuleSummaryIndexYAML.h =================================================================== --- llvm/trunk/include/llvm/IR/ModuleSummaryIndexYAML.h +++ llvm/trunk/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -98,6 +98,8 @@ static void enumeration(IO &io, WholeProgramDevirtResolution::Kind &value) { io.enumCase(value, "Indir", WholeProgramDevirtResolution::Indir); io.enumCase(value, "SingleImpl", WholeProgramDevirtResolution::SingleImpl); + io.enumCase(value, "BranchFunnel", + WholeProgramDevirtResolution::BranchFunnel); } }; Index: llvm/trunk/include/llvm/Target/Target.td =================================================================== --- llvm/trunk/include/llvm/Target/Target.td +++ llvm/trunk/include/llvm/Target/Target.td @@ -1140,6 +1140,12 @@ let mayStore = 1; let hasSideEffects = 1; } +def ICALL_BRANCH_FUNNEL : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins variable_ops); + let AsmString = ""; + let hasSideEffects = 1; +} // Generic opcodes used in GlobalISel. include "llvm/Target/GenericOpcodes.td" Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6043,6 +6043,60 @@ case Intrinsic::experimental_vector_reduce_fmin: visitVectorReduce(I, Intrinsic); return nullptr; + + case Intrinsic::icall_branch_funnel: { + SmallVector Ops; + Ops.push_back(DAG.getRoot()); + Ops.push_back(getValue(I.getArgOperand(0))); + + int64_t Offset; + auto *Base = dyn_cast(GetPointerBaseWithConstantOffset( + I.getArgOperand(1), Offset, DAG.getDataLayout())); + if (!Base) + report_fatal_error( + "llvm.icall.branch.funnel operand must be a GlobalValue"); + Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0)); + + struct BranchFunnelTarget { + int64_t Offset; + SDValue Target; + }; + SmallVector Targets; + + for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) { + auto *ElemBase = dyn_cast(GetPointerBaseWithConstantOffset( + I.getArgOperand(Op), Offset, DAG.getDataLayout())); + if (ElemBase != Base) + report_fatal_error("all llvm.icall.branch.funnel operands must refer " + "to the same GlobalValue"); + + SDValue Val = getValue(I.getArgOperand(Op + 1)); + auto *GA = dyn_cast(Val); + if (!GA) + report_fatal_error( + "llvm.icall.branch.funnel operand must be a GlobalValue"); + Targets.push_back({Offset, DAG.getTargetGlobalAddress( + GA->getGlobal(), getCurSDLoc(), + Val.getValueType(), GA->getOffset())}); + } + std::sort(Targets.begin(), Targets.end(), + [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) { + return T1.Offset < T2.Offset; + }); + + for (auto &T : Targets) { + Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32)); + Ops.push_back(T.Target); + } + + SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, + getCurSDLoc(), MVT::Other, Ops), + 0); + DAG.setRoot(N); + setValue(&I, N); + HasTailCall = true; + return nullptr; + } } } Index: llvm/trunk/lib/IR/Verifier.cpp =================================================================== --- llvm/trunk/lib/IR/Verifier.cpp +++ llvm/trunk/lib/IR/Verifier.cpp @@ -2864,17 +2864,20 @@ Function *F = CI.getParent()->getParent(); FunctionType *CallerTy = F->getFunctionType(); FunctionType *CalleeTy = CI.getFunctionType(); - Assert(CallerTy->getNumParams() == CalleeTy->getNumParams(), - "cannot guarantee tail call due to mismatched parameter counts", &CI); + if (!CI.getCalledFunction() || !CI.getCalledFunction()->isIntrinsic()) { + Assert(CallerTy->getNumParams() == CalleeTy->getNumParams(), + "cannot guarantee tail call due to mismatched parameter counts", + &CI); + for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) { + Assert( + isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)), + "cannot guarantee tail call due to mismatched parameter types", &CI); + } + } Assert(CallerTy->isVarArg() == CalleeTy->isVarArg(), "cannot guarantee tail call due to mismatched varargs", &CI); Assert(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()), "cannot guarantee tail call due to mismatched return types", &CI); - for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) { - Assert( - isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)), - "cannot guarantee tail call due to mismatched parameter types", &CI); - } // - The calling conventions of the caller and callee must match. Assert(F->getCallingConv() == CI.getCallingConv(), Index: llvm/trunk/lib/Target/X86/X86ExpandPseudo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ExpandPseudo.cpp +++ llvm/trunk/lib/Target/X86/X86ExpandPseudo.cpp @@ -59,12 +59,112 @@ } private: + void ExpandICallBranchFunnel(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MBBI); + bool ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool ExpandMBB(MachineBasicBlock &MBB); }; char X86ExpandPseudo::ID = 0; } // End anonymous namespace. +void X86ExpandPseudo::ExpandICallBranchFunnel( + MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) { + MachineBasicBlock *JTMBB = MBB; + MachineInstr *JTInst = &*MBBI; + MachineFunction *MF = MBB->getParent(); + const BasicBlock *BB = MBB->getBasicBlock(); + auto InsPt = MachineFunction::iterator(MBB); + ++InsPt; + + std::vector> TargetMBBs; + DebugLoc DL = JTInst->getDebugLoc(); + MachineOperand Selector = JTInst->getOperand(0); + const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal(); + + auto CmpTarget = [&](unsigned Target) { + BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11) + .addReg(X86::RIP) + .addImm(1) + .addReg(0) + .addGlobalAddress(CombinedGlobal, + JTInst->getOperand(2 + 2 * Target).getImm()) + .addReg(0); + BuildMI(*MBB, MBBI, DL, TII->get(X86::CMP64rr)) + .add(Selector) + .addReg(X86::R11); + }; + + auto CreateMBB = [&]() { + auto *NewMBB = MF->CreateMachineBasicBlock(BB); + MBB->addSuccessor(NewMBB); + return NewMBB; + }; + + auto EmitCondJump = [&](unsigned Opcode, MachineBasicBlock *ThenMBB) { + BuildMI(*MBB, MBBI, DL, TII->get(Opcode)).addMBB(ThenMBB); + + auto *ElseMBB = CreateMBB(); + MF->insert(InsPt, ElseMBB); + MBB = ElseMBB; + MBBI = MBB->end(); + }; + + auto EmitCondJumpTarget = [&](unsigned Opcode, unsigned Target) { + auto *ThenMBB = CreateMBB(); + TargetMBBs.push_back({ThenMBB, Target}); + EmitCondJump(Opcode, ThenMBB); + }; + + auto EmitTailCall = [&](unsigned Target) { + BuildMI(*MBB, MBBI, DL, TII->get(X86::TAILJMPd64)) + .add(JTInst->getOperand(3 + 2 * Target)); + }; + + std::function EmitBranchFunnel = + [&](unsigned FirstTarget, unsigned NumTargets) { + if (NumTargets == 1) { + EmitTailCall(FirstTarget); + return; + } + + if (NumTargets == 2) { + CmpTarget(FirstTarget + 1); + EmitCondJumpTarget(X86::JB_1, FirstTarget); + EmitTailCall(FirstTarget + 1); + return; + } + + if (NumTargets < 6) { + CmpTarget(FirstTarget + 1); + EmitCondJumpTarget(X86::JB_1, FirstTarget); + EmitCondJumpTarget(X86::JE_1, FirstTarget + 1); + EmitBranchFunnel(FirstTarget + 2, NumTargets - 2); + return; + } + + auto *ThenMBB = CreateMBB(); + CmpTarget(FirstTarget + (NumTargets / 2)); + EmitCondJump(X86::JB_1, ThenMBB); + EmitCondJumpTarget(X86::JE_1, FirstTarget + (NumTargets / 2)); + EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1, + NumTargets - (NumTargets / 2) - 1); + + MF->insert(InsPt, ThenMBB); + MBB = ThenMBB; + MBBI = MBB->end(); + EmitBranchFunnel(FirstTarget, NumTargets / 2); + }; + + EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2); + for (auto P : TargetMBBs) { + MF->insert(InsPt, P.first); + BuildMI(P.first, DL, TII->get(X86::TAILJMPd64)) + .add(JTInst->getOperand(3 + 2 * P.second)); + } + JTMBB->erase(JTInst); +} + /// If \p MBBI is a pseudo instruction, this method expands /// it to the corresponding (sequence of) actual instruction(s). /// \returns true if \p MBBI has been expanded. @@ -259,6 +359,9 @@ MBBI->eraseFromParent(); return true; } + case TargetOpcode::ICALL_BRANCH_FUNNEL: + ExpandICallBranchFunnel(&MBB, MBBI); + return true; } llvm_unreachable("Previous switch has a fallthrough?"); } Index: llvm/trunk/lib/Transforms/IPO/LowerTypeTests.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/LowerTypeTests.cpp +++ llvm/trunk/lib/Transforms/IPO/LowerTypeTests.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// // // This pass lowers type metadata and calls to the llvm.type.test intrinsic. +// It also ensures that globals are properly laid out for the +// llvm.icall.branch.funnel intrinsic. // See http://llvm.org/docs/TypeMetadata.html for more information. // //===----------------------------------------------------------------------===// @@ -25,6 +27,7 @@ #include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TypeMetadataUtils.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -291,6 +294,29 @@ } }; +struct ICallBranchFunnel final + : TrailingObjects { + static ICallBranchFunnel *create(BumpPtrAllocator &Alloc, CallInst *CI, + ArrayRef Targets) { + auto *Call = static_cast( + Alloc.Allocate(totalSizeToAlloc(Targets.size()), + alignof(ICallBranchFunnel))); + Call->CI = CI; + Call->NTargets = Targets.size(); + std::uninitialized_copy(Targets.begin(), Targets.end(), + Call->getTrailingObjects()); + return Call; + } + + CallInst *CI; + ArrayRef targets() const { + return makeArrayRef(getTrailingObjects(), NTargets); + } + +private: + size_t NTargets; +}; + class LowerTypeTestsModule { Module &M; @@ -372,6 +398,7 @@ const DenseMap &GlobalLayout); Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI, const TypeIdLowering &TIL); + void buildBitSetsFromGlobalVariables(ArrayRef TypeIds, ArrayRef Globals); unsigned getJumpTableEntrySize(); @@ -383,11 +410,13 @@ void buildBitSetsFromFunctions(ArrayRef TypeIds, ArrayRef Functions); void buildBitSetsFromFunctionsNative(ArrayRef TypeIds, - ArrayRef Functions); + ArrayRef Functions); void buildBitSetsFromFunctionsWASM(ArrayRef TypeIds, ArrayRef Functions); - void buildBitSetsFromDisjointSet(ArrayRef TypeIds, - ArrayRef Globals); + void + buildBitSetsFromDisjointSet(ArrayRef TypeIds, + ArrayRef Globals, + ArrayRef ICallBranchFunnels); void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT); void moveInitializerToModuleConstructor(GlobalVariable *GV); @@ -1462,7 +1491,8 @@ } void LowerTypeTestsModule::buildBitSetsFromDisjointSet( - ArrayRef TypeIds, ArrayRef Globals) { + ArrayRef TypeIds, ArrayRef Globals, + ArrayRef ICallBranchFunnels) { DenseMap TypeIdIndices; for (unsigned I = 0; I != TypeIds.size(); ++I) TypeIdIndices[TypeIds[I]] = I; @@ -1471,15 +1501,25 @@ // the type identifier. std::vector> TypeMembers(TypeIds.size()); unsigned GlobalIndex = 0; + DenseMap GlobalIndices; for (GlobalTypeMember *GTM : Globals) { for (MDNode *Type : GTM->types()) { // Type = { offset, type identifier } - unsigned TypeIdIndex = TypeIdIndices[Type->getOperand(1)]; - TypeMembers[TypeIdIndex].insert(GlobalIndex); + auto I = TypeIdIndices.find(Type->getOperand(1)); + if (I != TypeIdIndices.end()) + TypeMembers[I->second].insert(GlobalIndex); } + GlobalIndices[GTM] = GlobalIndex; GlobalIndex++; } + for (ICallBranchFunnel *JT : ICallBranchFunnels) { + TypeMembers.emplace_back(); + std::set &TMSet = TypeMembers.back(); + for (GlobalTypeMember *T : JT->targets()) + TMSet.insert(GlobalIndices[T]); + } + // Order the sets of indices by size. The GlobalLayoutBuilder works best // when given small index sets first. std::stable_sort( @@ -1567,8 +1607,11 @@ bool LowerTypeTestsModule::lower() { Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); - if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary && - !ImportSummary) + Function *ICallBranchFunnelFunc = + M.getFunction(Intrinsic::getName(Intrinsic::icall_branch_funnel)); + if ((!TypeTestFunc || TypeTestFunc->use_empty()) && + (!ICallBranchFunnelFunc || ICallBranchFunnelFunc->use_empty()) && + !ExportSummary && !ImportSummary) return false; if (ImportSummary) { @@ -1580,6 +1623,10 @@ } } + if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty()) + report_fatal_error( + "unexpected call to llvm.icall.branch.funnel during import phase"); + SmallVector Defs; SmallVector Decls; for (auto &F : M) { @@ -1604,8 +1651,8 @@ // Equivalence class set containing type identifiers and the globals that // reference them. This is used to partition the set of type identifiers in // the module into disjoint sets. - using GlobalClassesTy = - EquivalenceClasses>; + using GlobalClassesTy = EquivalenceClasses< + PointerUnion3>; GlobalClassesTy GlobalClasses; // Verify the type metadata and build a few data structures to let us @@ -1688,14 +1735,13 @@ } } + DenseMap GlobalTypeMembers; for (GlobalObject &GO : M.global_objects()) { if (isa(GO) && GO.isDeclarationForLinker()) continue; Types.clear(); GO.getMetadata(LLVMContext::MD_type, Types); - if (Types.empty()) - continue; bool IsDefinition = !GO.isDeclarationForLinker(); bool IsExported = false; @@ -1706,6 +1752,7 @@ auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types); + GlobalTypeMembers[&GO] = GTM; for (MDNode *Type : Types) { verifyTypeMDNode(&GO, Type); auto &Info = TypeIdInfo[Type->getOperand(1)]; @@ -1746,6 +1793,43 @@ } } + if (ICallBranchFunnelFunc) { + for (const Use &U : ICallBranchFunnelFunc->uses()) { + if (Arch != Triple::x86_64) + report_fatal_error( + "llvm.icall.branch.funnel not supported on this target"); + + auto CI = cast(U.getUser()); + + std::vector Targets; + if (CI->getNumArgOperands() % 2 != 1) + report_fatal_error("number of arguments should be odd"); + + GlobalClassesTy::member_iterator CurSet; + for (unsigned I = 1; I != CI->getNumArgOperands(); I += 2) { + int64_t Offset; + auto *Base = dyn_cast(GetPointerBaseWithConstantOffset( + CI->getOperand(I), Offset, M.getDataLayout())); + if (!Base) + report_fatal_error( + "Expected branch funnel operand to be global value"); + + GlobalTypeMember *GTM = GlobalTypeMembers[Base]; + Targets.push_back(GTM); + GlobalClassesTy::member_iterator NewSet = + GlobalClasses.findLeader(GlobalClasses.insert(GTM)); + if (I == 1) + CurSet = NewSet; + else + CurSet = GlobalClasses.unionSets(CurSet, NewSet); + } + + GlobalClasses.unionSets( + CurSet, GlobalClasses.findLeader(GlobalClasses.insert( + ICallBranchFunnel::create(Alloc, CI, Targets)))); + } + } + if (ExportSummary) { DenseMap> MetadataByGUID; for (auto &P : TypeIdInfo) { @@ -1798,13 +1882,16 @@ // Build the list of type identifiers in this disjoint set. std::vector TypeIds; std::vector Globals; + std::vector ICallBranchFunnels; for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(S.first); MI != GlobalClasses.member_end(); ++MI) { - if ((*MI).is()) + if (MI->is()) TypeIds.push_back(MI->get()); - else + else if (MI->is()) Globals.push_back(MI->get()); + else + ICallBranchFunnels.push_back(MI->get()); } // Order type identifiers by global index for determinism. This ordering is @@ -1814,7 +1901,7 @@ }); // Build bitsets for this disjoint set. - buildBitSetsFromDisjointSet(TypeIds, Globals); + buildBitSetsFromDisjointSet(TypeIds, Globals, ICallBranchFunnels); } allocateByteArrays(); Index: llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -316,12 +316,17 @@ /// cases we are directly operating on the call sites at the IR level. std::vector CallSites; + /// Whether all call sites represented by this CallSiteInfo, including those + /// in summaries, have been devirtualized. This starts off as true because a + /// default constructed CallSiteInfo represents no call sites. + bool AllCallSitesDevirted = true; + // These fields are used during the export phase of ThinLTO and reflect // information collected from function summaries. /// Whether any function summary contains an llvm.assume(llvm.type.test) for /// this slot. - bool SummaryHasTypeTestAssumeUsers; + bool SummaryHasTypeTestAssumeUsers = false; /// CFI-specific: a vector containing the list of function summaries that use /// the llvm.type.checked.load intrinsic and therefore will require @@ -337,8 +342,22 @@ !SummaryTypeCheckedLoadUsers.empty(); } - /// As explained in the comment for SummaryTypeCheckedLoadUsers. - void markDevirt() { SummaryTypeCheckedLoadUsers.clear(); } + void markSummaryHasTypeTestAssumeUsers() { + SummaryHasTypeTestAssumeUsers = true; + AllCallSitesDevirted = false; + } + + void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) { + SummaryTypeCheckedLoadUsers.push_back(FS); + AllCallSitesDevirted = false; + } + + void markDevirt() { + AllCallSitesDevirted = true; + + // As explained in the comment for SummaryTypeCheckedLoadUsers. + SummaryTypeCheckedLoadUsers.clear(); + } }; // Call site information collected for a specific VTableSlot. @@ -373,7 +392,9 @@ void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS, unsigned *NumUnsafeUses) { - findCallSiteInfo(CS).CallSites.push_back({VTable, CS, NumUnsafeUses}); + auto &CSI = findCallSiteInfo(CS); + CSI.AllCallSitesDevirted = false; + CSI.CallSites.push_back({VTable, CS, NumUnsafeUses}); } struct DevirtModule { @@ -438,6 +459,12 @@ VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res); + void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT, + bool &IsExported); + void tryICallBranchFunnel(MutableArrayRef TargetsForSlot, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, VTableSlot Slot); + bool tryEvaluateFunctionsWithArgs( MutableArrayRef TargetsForSlot, ArrayRef Args); @@ -471,6 +498,8 @@ StringRef Name, IntegerType *IntTy, uint32_t Storage); + Constant *getMemberAddr(const TypeMemberInfo *M); + void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne, Constant *UniqueMemberAddr); bool tryUniqueRetValOpt(unsigned BitWidth, @@ -726,10 +755,9 @@ if (VCallSite.NumUnsafeUses) --*VCallSite.NumUnsafeUses; } - if (CSInfo.isExported()) { + if (CSInfo.isExported()) IsExported = true; - CSInfo.markDevirt(); - } + CSInfo.markDevirt(); }; Apply(SlotInfo.CSInfo); for (auto &P : SlotInfo.ConstCSInfo) @@ -785,6 +813,134 @@ return true; } +void DevirtModule::tryICallBranchFunnel( + MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, VTableSlot Slot) { + Triple T(M.getTargetTriple()); + if (T.getArch() != Triple::x86_64) + return; + + const unsigned kBranchFunnelThreshold = 10; + if (TargetsForSlot.size() > kBranchFunnelThreshold) + return; + + bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted; + if (!HasNonDevirt) + for (auto &P : SlotInfo.ConstCSInfo) + if (!P.second.AllCallSitesDevirted) { + HasNonDevirt = true; + break; + } + + if (!HasNonDevirt) + return; + + FunctionType *FT = + FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true); + Function *JT; + if (isa(Slot.TypeID)) { + JT = Function::Create(FT, Function::ExternalLinkage, + getGlobalName(Slot, {}, "branch_funnel"), &M); + JT->setVisibility(GlobalValue::HiddenVisibility); + } else { + JT = Function::Create(FT, Function::InternalLinkage, "branch_funnel", &M); + } + JT->addAttribute(1, Attribute::Nest); + + std::vector JTArgs; + JTArgs.push_back(JT->arg_begin()); + for (auto &T : TargetsForSlot) { + JTArgs.push_back(getMemberAddr(T.TM)); + JTArgs.push_back(T.Fn); + } + + BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr); + Constant *Intr = + Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {}); + + auto *CI = CallInst::Create(Intr, JTArgs, "", BB); + CI->setTailCallKind(CallInst::TCK_MustTail); + ReturnInst::Create(M.getContext(), nullptr, BB); + + bool IsExported = false; + applyICallBranchFunnel(SlotInfo, JT, IsExported); + if (IsExported) + Res->TheKind = WholeProgramDevirtResolution::BranchFunnel; +} + +void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, + Constant *JT, bool &IsExported) { + auto Apply = [&](CallSiteInfo &CSInfo) { + if (CSInfo.isExported()) + IsExported = true; + if (CSInfo.AllCallSitesDevirted) + return; + for (auto &&VCallSite : CSInfo.CallSites) { + CallSite CS = VCallSite.CS; + + // Jump tables are only profitable if the retpoline mitigation is enabled. + Attribute FSAttr = CS.getCaller()->getFnAttribute("target-features"); + if (FSAttr.hasAttribute(Attribute::None) || + !FSAttr.getValueAsString().contains("+retpoline")) + continue; + + if (RemarksEnabled) + VCallSite.emitRemark("branch-funnel", JT->getName(), OREGetter); + + // Pass the address of the vtable in the nest register, which is r10 on + // x86_64. + std::vector NewArgs; + NewArgs.push_back(Int8PtrTy); + for (Type *T : CS.getFunctionType()->params()) + NewArgs.push_back(T); + PointerType *NewFT = PointerType::getUnqual( + FunctionType::get(CS.getFunctionType()->getReturnType(), NewArgs, + CS.getFunctionType()->isVarArg())); + + IRBuilder<> IRB(CS.getInstruction()); + std::vector Args; + Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy)); + for (unsigned I = 0; I != CS.getNumArgOperands(); ++I) + Args.push_back(CS.getArgOperand(I)); + + CallSite NewCS; + if (CS.isCall()) + NewCS = IRB.CreateCall(IRB.CreateBitCast(JT, NewFT), Args); + else + NewCS = IRB.CreateInvoke( + IRB.CreateBitCast(JT, NewFT), + cast(CS.getInstruction())->getNormalDest(), + cast(CS.getInstruction())->getUnwindDest(), Args); + NewCS.setCallingConv(CS.getCallingConv()); + + AttributeList Attrs = CS.getAttributes(); + std::vector NewArgAttrs; + NewArgAttrs.push_back(AttributeSet::get( + M.getContext(), ArrayRef{Attribute::get( + M.getContext(), Attribute::Nest)})); + for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I) + NewArgAttrs.push_back(Attrs.getParamAttributes(I)); + NewCS.setAttributes( + AttributeList::get(M.getContext(), Attrs.getFnAttributes(), + Attrs.getRetAttributes(), NewArgAttrs)); + + CS->replaceAllUsesWith(NewCS.getInstruction()); + CS->eraseFromParent(); + + // This use is no longer unsafe. + if (VCallSite.NumUnsafeUses) + --*VCallSite.NumUnsafeUses; + } + // Don't mark as devirtualized because there may be callers compiled without + // retpoline mitigation, which would mean that they are lowered to + // llvm.type.test and therefore require an llvm.type.test resolution for the + // type identifier. + }; + Apply(SlotInfo.CSInfo); + for (auto &P : SlotInfo.ConstCSInfo) + Apply(P.second); +} + bool DevirtModule::tryEvaluateFunctionsWithArgs( MutableArrayRef TargetsForSlot, ArrayRef Args) { @@ -937,6 +1093,12 @@ CSInfo.markDevirt(); } +Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) { + Constant *C = ConstantExpr::getBitCast(M->Bits->GV, Int8PtrTy); + return ConstantExpr::getGetElementPtr(Int8Ty, C, + ConstantInt::get(Int64Ty, M->Offset)); +} + bool DevirtModule::tryUniqueRetValOpt( unsigned BitWidth, MutableArrayRef TargetsForSlot, CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res, @@ -956,12 +1118,7 @@ // checked for a uniform return value in tryUniformRetValOpt. assert(UniqueMember); - Constant *UniqueMemberAddr = - ConstantExpr::getBitCast(UniqueMember->Bits->GV, Int8PtrTy); - UniqueMemberAddr = ConstantExpr::getGetElementPtr( - Int8Ty, UniqueMemberAddr, - ConstantInt::get(Int64Ty, UniqueMember->Offset)); - + Constant *UniqueMemberAddr = getMemberAddr(UniqueMember); if (CSInfo.isExported()) { Res->TheKind = WholeProgramDevirtResolution::ByArg::UniqueRetVal; Res->Info = IsOne; @@ -1348,6 +1505,14 @@ break; } } + + if (Res.TheKind == WholeProgramDevirtResolution::BranchFunnel) { + auto *JT = M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"), + Type::getVoidTy(M.getContext())); + bool IsExported = false; + applyICallBranchFunnel(SlotInfo, JT, IsExported); + assert(!IsExported); + } } void DevirtModule::removeRedundantTypeTests() { @@ -1417,14 +1582,13 @@ // FIXME: Only add live functions. for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { for (Metadata *MD : MetadataByGUID[VF.GUID]) { - CallSlots[{MD, VF.Offset}].CSInfo.SummaryHasTypeTestAssumeUsers = - true; + CallSlots[{MD, VF.Offset}] + .CSInfo.markSummaryHasTypeTestAssumeUsers(); } } for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { for (Metadata *MD : MetadataByGUID[VF.GUID]) { - CallSlots[{MD, VF.Offset}] - .CSInfo.SummaryTypeCheckedLoadUsers.push_back(FS); + CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS); } } for (const FunctionSummary::ConstVCall &VC : @@ -1432,7 +1596,7 @@ for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) { CallSlots[{MD, VC.VFunc.Offset}] .ConstCSInfo[VC.Args] - .SummaryHasTypeTestAssumeUsers = true; + .markSummaryHasTypeTestAssumeUsers(); } } for (const FunctionSummary::ConstVCall &VC : @@ -1440,7 +1604,7 @@ for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) { CallSlots[{MD, VC.VFunc.Offset}] .ConstCSInfo[VC.Args] - .SummaryTypeCheckedLoadUsers.push_back(FS); + .addSummaryTypeCheckedLoadUser(FS); } } } @@ -1464,9 +1628,12 @@ cast(S.first.TypeID)->getString()) .WPDRes[S.first.ByteOffset]; - if (!trySingleImplDevirt(TargetsForSlot, S.second, Res) && - tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first)) - DidVirtualConstProp = true; + if (!trySingleImplDevirt(TargetsForSlot, S.second, Res)) { + DidVirtualConstProp |= + tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); + + tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first); + } // Collect functions devirtualized at least for one call site for stats. if (RemarksEnabled) Index: llvm/trunk/test/CodeGen/X86/icall-branch-funnel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/icall-branch-funnel.ll +++ llvm/trunk/test/CodeGen/X86/icall-branch-funnel.ll @@ -0,0 +1,170 @@ +; RUN: llc -mtriple=x86_64-unknown-linux < %s | FileCheck %s + +@g = external global i8 + +declare void @f0() +declare void @f1() +declare void @f2() +declare void @f3() +declare void @f4() +declare void @f5() +declare void @f6() +declare void @f7() +declare void @f8() +declare void @f9() + +declare void @llvm.icall.branch.funnel(...) + +define void @jt2(i8* nest, ...) { + ; CHECK: jt2: + ; CHECK: leaq g+1(%rip), %r11 + ; CHECK-NEXT: cmpq %r11, %r10 + ; CHECK-NEXT: jae .LBB0_1 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f0 + ; CHECK-NEXT: .LBB0_1: + ; CHECK-NEXT: jmp f1 + musttail call void (...) @llvm.icall.branch.funnel( + i8* %0, + i8* getelementptr (i8, i8* @g, i64 0), void ()* @f0, + i8* getelementptr (i8, i8* @g, i64 1), void ()* @f1, + ... + ) + ret void +} + +define void @jt3(i8* nest, ...) { + ; CHECK: jt3: + ; CHECK: leaq g+1(%rip), %r11 + ; CHECK-NEXT: cmpq %r11, %r10 + ; CHECK-NEXT: jae .LBB1_1 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f0 + ; CHECK-NEXT: .LBB1_1: + ; CHECK-NEXT: jne .LBB1_2 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f1 + ; CHECK-NEXT: .LBB1_2: + ; CHECK-NEXT: jmp f2 + musttail call void (...) @llvm.icall.branch.funnel( + i8* %0, + i8* getelementptr (i8, i8* @g, i64 0), void ()* @f0, + i8* getelementptr (i8, i8* @g, i64 2), void ()* @f2, + i8* getelementptr (i8, i8* @g, i64 1), void ()* @f1, + ... + ) + ret void +} + +define void @jt7(i8* nest, ...) { + ; CHECK: jt7: + ; CHECK: leaq g+3(%rip), %r11 + ; CHECK-NEXT: cmpq %r11, %r10 + ; CHECK-NEXT: jae .LBB2_1 + ; CHECK-NEXT: # + ; CHECK-NEXT: leaq g+1(%rip), %r11 + ; CHECK-NEXT: cmpq %r11, %r10 + ; CHECK-NEXT: jae .LBB2_6 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f0 + ; CHECK-NEXT: .LBB2_1: + ; CHECK-NEXT: jne .LBB2_2 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f3 + ; CHECK-NEXT: .LBB2_6: + ; CHECK-NEXT: jne .LBB2_7 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f1 + ; CHECK-NEXT: .LBB2_2: + ; CHECK-NEXT: leaq g+5(%rip), %r11 + ; CHECK-NEXT: cmpq %r11, %r10 + ; CHECK-NEXT: jae .LBB2_3 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f4 + ; CHECK-NEXT: .LBB2_7: + ; CHECK-NEXT: jmp f2 + ; CHECK-NEXT: .LBB2_3: + ; CHECK-NEXT: jne .LBB2_4 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f5 + ; CHECK-NEXT: .LBB2_4: + ; CHECK-NEXT: jmp f6 + musttail call void (...) @llvm.icall.branch.funnel( + i8* %0, + i8* getelementptr (i8, i8* @g, i64 0), void ()* @f0, + i8* getelementptr (i8, i8* @g, i64 1), void ()* @f1, + i8* getelementptr (i8, i8* @g, i64 2), void ()* @f2, + i8* getelementptr (i8, i8* @g, i64 3), void ()* @f3, + i8* getelementptr (i8, i8* @g, i64 4), void ()* @f4, + i8* getelementptr (i8, i8* @g, i64 5), void ()* @f5, + i8* getelementptr (i8, i8* @g, i64 6), void ()* @f6, + ... + ) + ret void +} + +define void @jt10(i8* nest, ...) { + ; CHECK: jt10: + ; CHECK: leaq g+5(%rip), %r11 + ; CHECK-NEXT: cmpq %r11, %r10 + ; CHECK-NEXT: jae .LBB3_1 + ; CHECK-NEXT: # + ; CHECK-NEXT: leaq g+1(%rip), %r11 + ; CHECK-NEXT: cmpq %r11, %r10 + ; CHECK-NEXT: jae .LBB3_7 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f0 + ; CHECK-NEXT: .LBB3_1: + ; CHECK-NEXT: jne .LBB3_2 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f5 + ; CHECK-NEXT: .LBB3_7: + ; CHECK-NEXT: jne .LBB3_8 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f1 + ; CHECK-NEXT: .LBB3_2: + ; CHECK-NEXT: leaq g+7(%rip), %r11 + ; CHECK-NEXT: cmpq %r11, %r10 + ; CHECK-NEXT: jae .LBB3_3 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f6 + ; CHECK-NEXT: .LBB3_8: + ; CHECK-NEXT: leaq g+3(%rip), %r11 + ; CHECK-NEXT: cmpq %r11, %r10 + ; CHECK-NEXT: jae .LBB3_9 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f2 + ; CHECK-NEXT: .LBB3_3: + ; CHECK-NEXT: jne .LBB3_4 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f7 + ; CHECK-NEXT: .LBB3_9: + ; CHECK-NEXT: jne .LBB3_10 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f3 + ; CHECK-NEXT: .LBB3_4: + ; CHECK-NEXT: leaq g+9(%rip), %r11 + ; CHECK-NEXT: cmpq %r11, %r10 + ; CHECK-NEXT: jae .LBB3_5 + ; CHECK-NEXT: # + ; CHECK-NEXT: jmp f8 + ; CHECK-NEXT: .LBB3_10: + ; CHECK-NEXT: jmp f4 + ; CHECK-NEXT: .LBB3_5: + ; CHECK-NEXT: jmp f9 + musttail call void (...) @llvm.icall.branch.funnel( + i8* %0, + i8* getelementptr (i8, i8* @g, i64 0), void ()* @f0, + i8* getelementptr (i8, i8* @g, i64 1), void ()* @f1, + i8* getelementptr (i8, i8* @g, i64 2), void ()* @f2, + i8* getelementptr (i8, i8* @g, i64 3), void ()* @f3, + i8* getelementptr (i8, i8* @g, i64 4), void ()* @f4, + i8* getelementptr (i8, i8* @g, i64 5), void ()* @f5, + i8* getelementptr (i8, i8* @g, i64 6), void ()* @f6, + i8* getelementptr (i8, i8* @g, i64 7), void ()* @f7, + i8* getelementptr (i8, i8* @g, i64 8), void ()* @f8, + i8* getelementptr (i8, i8* @g, i64 9), void ()* @f9, + ... + ) + ret void +} Index: llvm/trunk/test/Transforms/LowerTypeTests/icall-branch-funnel.ll =================================================================== --- llvm/trunk/test/Transforms/LowerTypeTests/icall-branch-funnel.ll +++ llvm/trunk/test/Transforms/LowerTypeTests/icall-branch-funnel.ll @@ -0,0 +1,46 @@ +; RUN: opt -S -lowertypetests < %s | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux" + +; CHECK: @0 = private constant { i32, [0 x i8], i32 } { i32 1, [0 x i8] zeroinitializer, i32 2 } +; CHECK: @f1 = alias void (), void ()* @.cfi.jumptable +; CHECK: @f2 = alias void (), bitcast ([8 x i8]* getelementptr inbounds ([2 x [8 x i8]], [2 x [8 x i8]]* bitcast (void ()* @.cfi.jumptable to [2 x [8 x i8]]*), i64 0, i64 1) to void ()*) +; CHECK: @g1 = alias i32, getelementptr inbounds ({ i32, [0 x i8], i32 }, { i32, [0 x i8], i32 }* @0, i32 0, i32 0) +; CHECK: @g2 = alias i32, getelementptr inbounds ({ i32, [0 x i8], i32 }, { i32, [0 x i8], i32 }* @0, i32 0, i32 2) + +@g1 = constant i32 1 +@g2 = constant i32 2 + +define void @f1() { + ret void +} + +define void @f2() { + ret void +} + +declare void @g1f() +declare void @g2f() + +define void @jt2(i8* nest, ...) { + musttail call void (...) @llvm.icall.branch.funnel( + i8* %0, + i32* @g1, void ()* @g1f, + i32* @g2, void ()* @g2f, + ... + ) + ret void +} + +define void @jt3(i8* nest, ...) { + musttail call void (...) @llvm.icall.branch.funnel( + i8* %0, + void ()* @f1, void ()* @f1, + void ()* @f2, void ()* @f2, + ... + ) + ret void +} + +declare void @llvm.icall.branch.funnel(...) Index: llvm/trunk/test/Transforms/WholeProgramDevirt/Inputs/import-branch-funnel.yaml =================================================================== --- llvm/trunk/test/Transforms/WholeProgramDevirt/Inputs/import-branch-funnel.yaml +++ llvm/trunk/test/Transforms/WholeProgramDevirt/Inputs/import-branch-funnel.yaml @@ -0,0 +1,11 @@ +--- +TypeIdMap: + typeid1: + WPDRes: + 0: + Kind: BranchFunnel + typeid2: + WPDRes: + 8: + Kind: BranchFunnel +... Index: llvm/trunk/test/Transforms/WholeProgramDevirt/Inputs/import-vcp-branch-funnel.yaml =================================================================== --- llvm/trunk/test/Transforms/WholeProgramDevirt/Inputs/import-vcp-branch-funnel.yaml +++ llvm/trunk/test/Transforms/WholeProgramDevirt/Inputs/import-vcp-branch-funnel.yaml @@ -0,0 +1,23 @@ +--- +TypeIdMap: + typeid1: + WPDRes: + 0: + Kind: BranchFunnel + ResByArg: + 1: + Kind: VirtualConstProp + Info: 0 + Byte: 42 + Bit: 0 + typeid2: + WPDRes: + 8: + Kind: BranchFunnel + ResByArg: + 3: + Kind: VirtualConstProp + Info: 0 + Byte: 43 + Bit: 128 +... Index: llvm/trunk/test/Transforms/WholeProgramDevirt/branch-funnel.ll =================================================================== --- llvm/trunk/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ llvm/trunk/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -0,0 +1,155 @@ +; RUN: opt -S -wholeprogramdevirt %s | FileCheck --check-prefixes=CHECK,RETP %s +; RUN: sed -e 's,+retpoline,-retpoline,g' %s | opt -S -wholeprogramdevirt | FileCheck --check-prefixes=CHECK,NORETP %s +; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,RETP %s +; RUN: FileCheck --check-prefix=SUMMARY %s < %t + +; SUMMARY: TypeIdMap: +; SUMMARY-NEXT: typeid1: +; SUMMARY-NEXT: TTRes: +; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: SizeM1BitWidth: 0 +; SUMMARY-NEXT: AlignLog2: 0 +; SUMMARY-NEXT: SizeM1: 0 +; SUMMARY-NEXT: BitMask: 0 +; SUMMARY-NEXT: InlineBits: 0 +; SUMMARY-NEXT: WPDRes: +; SUMMARY-NEXT: 0: +; SUMMARY-NEXT: Kind: BranchFunnel +; SUMMARY-NEXT: SingleImplName: '' +; SUMMARY-NEXT: ResByArg: +; SUMMARY-NEXT: typeid2: +; SUMMARY-NEXT: TTRes: +; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: SizeM1BitWidth: 0 +; SUMMARY-NEXT: AlignLog2: 0 +; SUMMARY-NEXT: SizeM1: 0 +; SUMMARY-NEXT: BitMask: 0 +; SUMMARY-NEXT: InlineBits: 0 +; SUMMARY-NEXT: WPDRes: +; SUMMARY-NEXT: 0: +; SUMMARY-NEXT: Kind: Indir +; SUMMARY-NEXT: SingleImplName: '' +; SUMMARY-NEXT: ResByArg: +; SUMMARY-NEXT: typeid3: +; SUMMARY-NEXT: TTRes: +; SUMMARY-NEXT: Kind: Unsat +; SUMMARY-NEXT: SizeM1BitWidth: 0 +; SUMMARY-NEXT: AlignLog2: 0 +; SUMMARY-NEXT: SizeM1: 0 +; SUMMARY-NEXT: BitMask: 0 +; SUMMARY-NEXT: InlineBits: 0 +; SUMMARY-NEXT: WPDRes: +; SUMMARY-NEXT: 0: +; SUMMARY-NEXT: Kind: BranchFunnel +; SUMMARY-NEXT: SingleImplName: '' +; SUMMARY-NEXT: ResByArg: + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +@vt1_1 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf1_1 to i8*)], !type !0 +@vt1_2 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf1_2 to i8*)], !type !0 + +declare i32 @vf1_1(i8* %this, i32 %arg) +declare i32 @vf1_2(i8* %this, i32 %arg) + +@vt2_1 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_1 to i8*)], !type !1 +@vt2_2 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_2 to i8*)], !type !1 +@vt2_3 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_3 to i8*)], !type !1 +@vt2_4 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_4 to i8*)], !type !1 +@vt2_5 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_5 to i8*)], !type !1 +@vt2_6 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_6 to i8*)], !type !1 +@vt2_7 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_7 to i8*)], !type !1 +@vt2_8 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_8 to i8*)], !type !1 +@vt2_9 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_9 to i8*)], !type !1 +@vt2_10 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_10 to i8*)], !type !1 +@vt2_11 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2_11 to i8*)], !type !1 + +declare i32 @vf2_1(i8* %this, i32 %arg) +declare i32 @vf2_2(i8* %this, i32 %arg) +declare i32 @vf2_3(i8* %this, i32 %arg) +declare i32 @vf2_4(i8* %this, i32 %arg) +declare i32 @vf2_5(i8* %this, i32 %arg) +declare i32 @vf2_6(i8* %this, i32 %arg) +declare i32 @vf2_7(i8* %this, i32 %arg) +declare i32 @vf2_8(i8* %this, i32 %arg) +declare i32 @vf2_9(i8* %this, i32 %arg) +declare i32 @vf2_10(i8* %this, i32 %arg) +declare i32 @vf2_11(i8* %this, i32 %arg) + +@vt3_1 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf3_1 to i8*)], !type !2 +@vt3_2 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf3_2 to i8*)], !type !2 + +declare i32 @vf3_1(i8* %this, i32 %arg) +declare i32 @vf3_2(i8* %this, i32 %arg) + +@vt4_1 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf4_1 to i8*)], !type !3 +@vt4_2 = constant [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf4_2 to i8*)], !type !3 + +declare i32 @vf4_1(i8* %this, i32 %arg) +declare i32 @vf4_2(i8* %this, i32 %arg) + +; CHECK: define i32 @fn1 +define i32 @fn1(i8* %obj) #0 { + %vtableptr = bitcast i8* %obj to [1 x i8*]** + %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr + %vtablei8 = bitcast [1 x i8*]* %vtable to i8* + %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0 + %fptr = load i8*, i8** %fptrptr + %fptr_casted = bitcast i8* %fptr to i32 (i8*, i32)* + ; RETP: {{.*}} = bitcast {{.*}} to i8* + ; RETP: [[VT1:%.*]] = bitcast {{.*}} to i8* + ; RETP: call i32 bitcast (void (i8*, ...)* @__typeid_typeid1_0_branch_funnel to i32 (i8*, i8*, i32)*)(i8* nest [[VT1]], i8* %obj, i32 1) + %result = call i32 %fptr_casted(i8* %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + +; CHECK: define i32 @fn2 +define i32 @fn2(i8* %obj) #0 { + %vtableptr = bitcast i8* %obj to [1 x i8*]** + %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr + %vtablei8 = bitcast [1 x i8*]* %vtable to i8* + %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0 + %fptr = load i8*, i8** %fptrptr + %fptr_casted = bitcast i8* %fptr to i32 (i8*, i32)* + ; CHECK: call i32 % + %result = call i32 %fptr_casted(i8* %obj, i32 1) + ret i32 %result +} + +; CHECK: define i32 @fn3 +define i32 @fn3(i8* %obj) #0 { + %vtableptr = bitcast i8* %obj to [1 x i8*]** + %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr + %vtablei8 = bitcast [1 x i8*]* %vtable to i8* + %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !4) + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0 + %fptr = load i8*, i8** %fptrptr + %fptr_casted = bitcast i8* %fptr to i32 (i8*, i32)* + ; RETP: call i32 bitcast (void (i8*, ...)* @branch_funnel to + ; NORETP: call i32 % + %result = call i32 %fptr_casted(i8* %obj, i32 1) + ret i32 %result +} + +; CHECK: define internal void @branch_funnel(i8* nest, ...) + +; CHECK: define hidden void @__typeid_typeid1_0_branch_funnel(i8* nest, ...) +; CHECK-NEXT: call void (...) @llvm.icall.branch.funnel(i8* %0, i8* bitcast ([1 x i8*]* @vt1_1 to i8*), i32 (i8*, i32)* @vf1_1, i8* bitcast ([1 x i8*]* @vt1_2 to i8*), i32 (i8*, i32)* @vf1_2, ...) + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +!0 = !{i32 0, !"typeid1"} +!1 = !{i32 0, !"typeid2"} +!2 = !{i32 0, !"typeid3"} +!3 = !{i32 0, !4} +!4 = distinct !{} + +attributes #0 = { "target-features"="+retpoline" } Index: llvm/trunk/test/Transforms/WholeProgramDevirt/import.ll =================================================================== --- llvm/trunk/test/Transforms/WholeProgramDevirt/import.ll +++ llvm/trunk/test/Transforms/WholeProgramDevirt/import.ll @@ -1,10 +1,12 @@ ; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-single-impl.yaml < %s | FileCheck --check-prefixes=CHECK,SINGLE-IMPL %s -; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-uniform-ret-val.yaml < %s | FileCheck --check-prefixes=CHECK,UNIFORM-RET-VAL %s -; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-unique-ret-val0.yaml < %s | FileCheck --check-prefixes=CHECK,UNIQUE-RET-VAL0 %s -; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-unique-ret-val1.yaml < %s | FileCheck --check-prefixes=CHECK,UNIQUE-RET-VAL1 %s -; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-vcp.yaml < %s | FileCheck --check-prefixes=CHECK,VCP,VCP-X86,VCP64 %s +; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-uniform-ret-val.yaml < %s | FileCheck --check-prefixes=CHECK,INDIR,UNIFORM-RET-VAL %s +; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-unique-ret-val0.yaml < %s | FileCheck --check-prefixes=CHECK,INDIR,UNIQUE-RET-VAL0 %s +; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-unique-ret-val1.yaml < %s | FileCheck --check-prefixes=CHECK,INDIR,UNIQUE-RET-VAL1 %s +; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-vcp.yaml < %s | FileCheck --check-prefixes=CHECK,VCP,VCP-X86,VCP64,INDIR %s ; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-vcp.yaml -mtriple=i686-unknown-linux -data-layout=e-p:32:32 < %s | FileCheck --check-prefixes=CHECK,VCP,VCP-X86,VCP32 %s ; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-vcp.yaml -mtriple=armv7-unknown-linux -data-layout=e-p:32:32 < %s | FileCheck --check-prefixes=CHECK,VCP,VCP-ARM %s +; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-vcp-branch-funnel.yaml < %s | FileCheck --check-prefixes=CHECK,VCP,VCP-X86,VCP64,BRANCH-FUNNEL %s +; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import -wholeprogramdevirt-read-summary=%S/Inputs/import-branch-funnel.yaml < %s | FileCheck --check-prefixes=CHECK,BRANCH-FUNNEL,BRANCH-FUNNEL-NOVCP %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" @@ -18,7 +20,7 @@ ; constant propagation. ; CHECK: define i32 @call1 -define i32 @call1(i8* %obj) { +define i32 @call1(i8* %obj) #0 { %vtableptr = bitcast i8* %obj to [3 x i8*]** %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr %vtablei8 = bitcast [3 x i8*]* %vtable to i8* @@ -27,16 +29,18 @@ %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 0 %fptr = load i8*, i8** %fptrptr %fptr_casted = bitcast i8* %fptr to i32 (i8*, i32)* + ; CHECK: {{.*}} = bitcast {{.*}} to i8* + ; VCP: [[VT1:%.*]] = bitcast {{.*}} to i8* ; SINGLE-IMPL: call i32 bitcast (void ()* @singleimpl1 to i32 (i8*, i32)*) %result = call i32 %fptr_casted(i8* %obj, i32 1) ; UNIFORM-RET-VAL: ret i32 42 - ; VCP: {{.*}} = bitcast {{.*}} to i8* - ; VCP: [[VT1:%.*]] = bitcast {{.*}} to i8* ; VCP-X86: [[GEP1:%.*]] = getelementptr i8, i8* [[VT1]], i32 ptrtoint (i8* @__typeid_typeid1_0_1_byte to i32) ; VCP-ARM: [[GEP1:%.*]] = getelementptr i8, i8* [[VT1]], i32 42 ; VCP: [[BC1:%.*]] = bitcast i8* [[GEP1]] to i32* ; VCP: [[LOAD1:%.*]] = load i32, i32* [[BC1]] ; VCP: ret i32 [[LOAD1]] + ; BRANCH-FUNNEL-NOVCP: [[VT1:%.*]] = bitcast {{.*}} to i8* + ; BRANCH-FUNNEL-NOVCP: call i32 bitcast (void ()* @__typeid_typeid1_0_branch_funnel to i32 (i8*, i8*, i32)*)(i8* nest [[VT1]], i8* %obj, i32 1) ret i32 %result } @@ -44,7 +48,8 @@ ; constant propagation. ; CHECK: define i1 @call2 -define i1 @call2(i8* %obj) { +define i1 @call2(i8* %obj) #0 { + ; BRANCH-FUNNEL: [[VT1:%.*]] = bitcast {{.*}} to i8* %vtableptr = bitcast i8* %obj to [1 x i8*]** %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr %vtablei8 = bitcast [1 x i8*]* %vtable to i8* @@ -57,9 +62,8 @@ cont: %fptr_casted = bitcast i8* %fptr to i1 (i8*, i32)* ; SINGLE-IMPL: call i1 bitcast (void ()* @singleimpl2 to i1 (i8*, i32)*) - ; UNIFORM-RET-VAL: call i1 % - ; UNIQUE-RET-VAL0: call i1 % - ; UNIQUE-RET-VAL1: call i1 % + ; INDIR: call i1 % + ; BRANCH-FUNNEL: call i1 bitcast (void ()* @__typeid_typeid2_8_branch_funnel to i1 (i8*, i8*, i32)*)(i8* nest [[VT1]], i8* %obj, i32 undef) %result = call i1 %fptr_casted(i8* %obj, i32 undef) ret i1 %result @@ -69,7 +73,7 @@ } ; CHECK: define i1 @call3 -define i1 @call3(i8* %obj) { +define i1 @call3(i8* %obj) #0 { %vtableptr = bitcast i8* %obj to [1 x i8*]** %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr %vtablei8 = bitcast [1 x i8*]* %vtable to i8* @@ -91,6 +95,8 @@ ; VCP-ARM: [[AND2:%.*]] = and i8 [[LOAD2]], -128 ; VCP: [[ICMP2:%.*]] = icmp ne i8 [[AND2]], 0 ; VCP: ret i1 [[ICMP2]] + ; BRANCH-FUNNEL-NOVCP: [[VT2:%.*]] = bitcast {{.*}} to i8* + ; BRANCH-FUNNEL-NOVCP: call i1 bitcast (void ()* @__typeid_typeid2_8_branch_funnel to i1 (i8*, i8*, i32)*)(i8* nest [[VT2]], i8* %obj, i32 3) ret i1 %result trap: @@ -111,3 +117,5 @@ declare void @llvm.trap() declare {i8*, i1} @llvm.type.checked.load(i8*, i32, metadata) declare i1 @llvm.type.test(i8*, metadata) + +attributes #0 = { "target-features"="+retpoline" }