This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Transforms/IPO/
-
Transforms/
-
IPO/
1/6
WholeProgramDevirt.cpp
-
test/Transforms/WholeProgramDevirt/
-
Transforms/
-
WholeProgramDevirt/
-
branch-funnel.ll

Differential D146267

[llvm] Handle duplicate call bases when applying branch funneling
ClosedPublic

Authored by leonardchan on Mar 16 2023, 4:50 PM.

Download Raw Diff

Details

Reviewers

tejohnson
pcc
phosek

Commits

rG53a917595186: [llvm] Handle duplicate call bases when applying branch funneling

Summary

It's possible to segfault in DevirtModule::applyICallBranchFunnel when attempting to call getCaller on a call base that was erased in a prior iteration. This can occur when attempting to find devirtualizable calls via findDevirtualizableCallsForTypeTest if the vtable passed to llvm.type.test is a global and not a local. The function works by taking the first argument of the llvm.type.test call (which is a vtable), iterating through all uses of it, and adding any relevant all uses that are calls associated with that intrinsic call to a vector. For most cases where the vtable is actually a *local*, this wouldn't be an issue. Take for example:

define i32 @fn(ptr %obj) #0 {                                                                                                                                                                                                                                                            
  %vtable = load ptr, ptr %obj                                                                                                                                                                                                                                                            
  %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2")                                                                                                                                                                                                                          
  call void @llvm.assume(i1 %p)                                                                                                                                                                                                                                                           
  %fptr = load ptr, ptr %vtable                                                                                                                                                                                                                                                           
  %result = call i32 %fptr(ptr %obj, i32 1)                                                                                                                                                                                                                                               
  ret i32 %result                                                                                                                                                                                                                                                                         
}

findDevirtualizableCallsForTypeTest will check the call base %result = call i32 %fptr(ptr %obj, i32 1), find that it is associated with a virtualizable call from %vtable, find all loads for %vtable, and add any instances those load results are called into a vector. Now consider the case where instead %vtable was the global itself rather than a local:

define i32 @fn(ptr %obj) #0 {                                                                                                                                                                                                                                                            
  %p = call i1 @llvm.type.test(ptr @vtable, metadata !"typeid2")                                                                                                                                                                                                                          
  call void @llvm.assume(i1 %p)                                                                                                                                                                                                                                                           
  %fptr = load ptr, ptr @vtable                                                                                                                                                                                                                                                           
  %result = call i32 %fptr(ptr %obj, i32 1)                                                                                                                                                                                                                                               
  ret i32 %result                                                                                                                                                                                                                                                                         
}

findDevirtualizableCallsForTypeTest should work normally and add one unique call instance to a vector. However, if there are multiple instances where this same global is used for llvm.type.test, like with:

define i32 @fn(ptr %obj) #0 {                                                                                                                                                                                                                                                            
  %p = call i1 @llvm.type.test(ptr @vtable, metadata !"typeid2")                                                                                                                                                                                                                          
  call void @llvm.assume(i1 %p)                                                                                                                                                                                                                                                           
  %fptr = load ptr, ptr @vtable                                                                                                                                                                                                                                                           
  %result = call i32 %fptr(ptr %obj, i32 1)                                                                                                                                                                                                                                               
  ret i32 %result
}

define i32 @fn2(ptr %obj) #0 {                                                                                                                                                                                                                                                            
  %p = call i1 @llvm.type.test(ptr @vtable, metadata !"typeid2")                                                                                                                                                                                                                          
  call void @llvm.assume(i1 %p)                                                                                                                                                                                                                                                           
  %fptr = load ptr, ptr @vtable                                                                                                                                                                                                                                                           
  %result = call i32 %fptr(ptr %obj, i32 1)                                                                                                                                                                                                                                               
  ret i32 %result
}

Then each call base %result = call i32 %fptr(ptr %obj, i32 1) will be added to the vector twice. This is because for either call base %result = call i32 %fptr(ptr %obj, i32 1) , we determine it is associated with a virtualizable call from @vtable, and then we iterate through all the uses of @vtable, which is used across multiple functions. So when scanning the first %result = call i32 %fptr(ptr %obj, i32 1), then both call bases will be added to the vector, but when scanning the second one, both call bases are added again, resulting in duplicate call bases in the CSInfo.CallSites vector.

Note this is actually accounted for in every other instance WPD iterates over CallSites. What everything else does is actually add the call base to the OptimizedCalls set and just check if it's already in the set. We can't reuse that particular set since it serves a different purpose marking which calls where devirtualized which applyICallBranchFunnel explicitly says it doesn't. For this fix, we can just account for duplicates with a map and do the actual replacements afterwards by iterating over the map.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

leonardchan created this revision.Mar 16 2023, 4:50 PM

Herald added a project: Restricted Project. · View Herald TranscriptMar 16 2023, 4:50 PM

Herald added subscribers: ormris, hiraditya, Prazek. · View Herald Transcript

leonardchan requested review of this revision.Mar 16 2023, 4:50 PM

@tejohnson There was an old comment you left in D134320 asking why the check was warranted. We managed to reproduce the segfault and hopefully this description answers your initial comment.

Harbormaster completed remote builds in B219976: Diff 505948.Mar 16 2023, 5:51 PM

tejohnson added inline comments.Mar 17 2023, 10:01 AM

llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
1399	I don't think this is safe since when we eraseFromParent the instruction is deleted. Can we track a different way?

any overlap with D104798?

In D146267#4202607, @aeubanks wrote:

any overlap with D104798?

Yeah this this looks pretty similar. Opting to use the set approach you use there.

llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
1399	Updated to just add erased pointers to a set and check on future iterations if they're in the set. This way we don't have to deref them.

aeubanks added inline comments.Mar 21 2023, 4:48 PM

llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
1399	if the instruction is erased, can there be instructions created after that share the same pointer?

leonardchan marked an inline comment as not done.Mar 21 2023, 5:07 PM

leonardchan added inline comments.

llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
1399	Hmm, I would think yes, but I think for this specific case it doesn't matter since none of the newly created instructions in this loop get added back to `CSInfo.CallSites` so each of the CBs should only refer to CBs that were added earlier.

Harbormaster completed remote builds in B220883: Diff 507179.Mar 21 2023, 6:00 PM

leonardchan added a reviewer: phosek.Mar 23 2023, 11:48 AM

tejohnson added inline comments.Mar 23 2023, 1:41 PM

llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
1399	While I think this would work, given what you mentioned above, the following is I think preferable from a safety/clarity perspective: Change CallBases to a map from the orig CallBase to the new one. Keep this bit of code the same so it skips any repeats. After the loop walk the map and do all of the replacing / erasing. Would that work?

leonardchan updated this revision to Diff 507883.Mar 23 2023, 2:22 PM

leonardchan added inline comments.

llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
1399	Should be fine. Updated.

Lgtm. Patch description looks like it needs an update before you push.

This revision is now accepted and ready to land.Mar 23 2023, 2:34 PM

leonardchan edited the summary of this revision. (Show Details)Mar 23 2023, 2:42 PM

This revision was landed with ongoing or failed builds.Mar 23 2023, 2:48 PM

Closed by commit rG53a917595186: [llvm] Handle duplicate call bases when applying branch funneling (authored by leonardchan). · Explain Why

This revision was automatically updated to reflect the committed changes.

leonardchan added a commit: rG53a917595186: [llvm] Handle duplicate call bases when applying branch funneling.

Harbormaster completed remote builds in B221420: Diff 507883.Mar 23 2023, 3:14 PM

Revision Contents

Path

Size

llvm/

lib/

Transforms/

IPO/

WholeProgramDevirt.cpp

19 lines

test/

Transforms/

WholeProgramDevirt/

branch-funnel.ll

48 lines

Diff 507890

llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp

Show First 20 Lines • Show All 1,385 Lines • ▼ Show 20 Lines

void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,		void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
Constant *JT, bool &IsExported) {		Constant *JT, bool &IsExported) {
auto Apply = [&](CallSiteInfo &CSInfo) {		auto Apply = [&](CallSiteInfo &CSInfo) {
if (CSInfo.isExported())		if (CSInfo.isExported())
IsExported = true;		IsExported = true;
if (CSInfo.AllCallSitesDevirted)		if (CSInfo.AllCallSitesDevirted)
return;		return;

		std::map<CallBase , CallBase > CallBases;
for (auto &&VCallSite : CSInfo.CallSites) {		for (auto &&VCallSite : CSInfo.CallSites) {
CallBase &CB = VCallSite.CB;		CallBase &CB = VCallSite.CB;

		if (CallBases.find(&CB) != CallBases.end()) {
		tejohnsonUnsubmitted Not Done Reply Inline Actions I don't think this is safe since when we eraseFromParent the instruction is deleted. Can we track a different way? tejohnson: I don't think this is safe since when we eraseFromParent the instruction is deleted. Can we…
		leonardchanAuthorUnsubmitted Not Done Reply Inline Actions Updated to just add erased pointers to a set and check on future iterations if they're in the set. This way we don't have to deref them. leonardchan: Updated to just add erased pointers to a set and check on future iterations if they're in the…
		aeubanksUnsubmitted Not Done Reply Inline Actions if the instruction is erased, can there be instructions created after that share the same pointer? aeubanks: if the instruction is erased, can there be instructions created after that share the same…
		leonardchanAuthorUnsubmitted Not Done Reply Inline Actions Hmm, I would think yes, but I think for this specific case it doesn't matter since none of the newly created instructions in this loop get added back to `CSInfo.CallSites` so each of the CBs should only refer to CBs that were added earlier. leonardchan: Hmm, I would think yes, but I think for this specific case it doesn't matter since none of the…
		tejohnsonUnsubmitted Not Done Reply Inline Actions While I think this would work, given what you mentioned above, the following is I think preferable from a safety/clarity perspective: Change CallBases to a map from the orig CallBase to the new one. Keep this bit of code the same so it skips any repeats. After the loop walk the map and do all of the replacing / erasing. Would that work? tejohnson: While I think this would work, given what you mentioned above, the following is I think…
		leonardchanAuthorUnsubmitted Done Reply Inline Actions Should be fine. Updated. leonardchan: Should be fine. Updated.
		// When finding devirtualizable calls, it's possible to find the same
		// vtable passed to multiple llvm.type.test or llvm.type.checked.load
		// calls, which can cause duplicate call sites to be recorded in
		// [Const]CallSites. If we've already found one of these
		// call instances, just ignore it. It will be replaced later.
		continue;
		}

// Jump tables are only profitable if the retpoline mitigation is enabled.		// Jump tables are only profitable if the retpoline mitigation is enabled.
Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");		Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
if (!FSAttr.isValid() \|\|		if (!FSAttr.isValid() \|\|
!FSAttr.getValueAsString().contains("+retpoline"))		!FSAttr.getValueAsString().contains("+retpoline"))
continue;		continue;

NumBranchFunnel++;		NumBranchFunnel++;
if (RemarksEnabled)		if (RemarksEnabled)
Show All 30 Lines	for (auto &&VCallSite : CSInfo.CallSites) {
M.getContext(), ArrayRef<Attribute>{Attribute::get(		M.getContext(), ArrayRef<Attribute>{Attribute::get(
M.getContext(), Attribute::Nest)}));		M.getContext(), Attribute::Nest)}));
for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)		for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
NewArgAttrs.push_back(Attrs.getParamAttrs(I));		NewArgAttrs.push_back(Attrs.getParamAttrs(I));
NewCS->setAttributes(		NewCS->setAttributes(
AttributeList::get(M.getContext(), Attrs.getFnAttrs(),		AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
Attrs.getRetAttrs(), NewArgAttrs));		Attrs.getRetAttrs(), NewArgAttrs));

CB.replaceAllUsesWith(NewCS);		CallBases[&CB] = NewCS;
CB.eraseFromParent();

// This use is no longer unsafe.		// This use is no longer unsafe.
if (VCallSite.NumUnsafeUses)		if (VCallSite.NumUnsafeUses)
--*VCallSite.NumUnsafeUses;		--*VCallSite.NumUnsafeUses;
}		}
// Don't mark as devirtualized because there may be callers compiled without		// Don't mark as devirtualized because there may be callers compiled without
// retpoline mitigation, which would mean that they are lowered to		// retpoline mitigation, which would mean that they are lowered to
// llvm.type.test and therefore require an llvm.type.test resolution for the		// llvm.type.test and therefore require an llvm.type.test resolution for the
// type identifier.		// type identifier.

		std::for_each(CallBases.begin(), CallBases.end(), [](auto &CBs) {
		CBs.first->replaceAllUsesWith(CBs.second);
		CBs.first->eraseFromParent();
		});
};		};
Apply(SlotInfo.CSInfo);		Apply(SlotInfo.CSInfo);
for (auto &P : SlotInfo.ConstCSInfo)		for (auto &P : SlotInfo.ConstCSInfo)
Apply(P.second);		Apply(P.second);
}		}

bool DevirtModule::tryEvaluateFunctionsWithArgs(		bool DevirtModule::tryEvaluateFunctionsWithArgs(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,		MutableArrayRef<VirtualCallTarget> TargetsForSlot,
▲ Show 20 Lines • Show All 965 Lines • Show Last 20 Lines

llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll

Show First 20 Lines • Show All 227 Lines • ▼ Show 20 Lines	define i32 @fn3_rv(ptr %obj) #0 {
call void @llvm.assume(i1 %p)		call void @llvm.assume(i1 %p)
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)		%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)
; RETP: call i32 @branch_funnel.1(ptr		; RETP: call i32 @branch_funnel.1(ptr
; NORETP: call i32 %		; NORETP: call i32 %
%result = call i32 %fptr(ptr %obj, i32 1)		%result = call i32 %fptr(ptr %obj, i32 1)
ret i32 %result		ret i32 %result
}		}

		; CHECK-LABEL: define i32 @fn4
		; CHECK-NOT: call void (...) @llvm.icall.branch.funnel
		define i32 @fn4(ptr %obj) #0 {
		%p = call i1 @llvm.type.test(ptr @vt1_1, metadata !"typeid1")
		call void @llvm.assume(i1 %p)
		%fptr = load ptr, ptr @vt1_1
		; RETP: call i32 @__typeid_typeid1_0_branch_funnel(ptr nest @vt1_1, ptr %obj, i32 1)
		%result = call i32 %fptr(ptr %obj, i32 1)
		; NORETP: call i32 %
		ret i32 %result
		}

		; CHECK-LABEL: define i32 @fn4_cpy
		; CHECK-NOT: call void (...) @llvm.icall.branch.funnel
		define i32 @fn4_cpy(ptr %obj) #0 {
		%p = call i1 @llvm.type.test(ptr @vt1_1, metadata !"typeid1")
		call void @llvm.assume(i1 %p)
		%fptr = load ptr, ptr @vt1_1
		; RETP: call i32 @__typeid_typeid1_0_branch_funnel(ptr nest @vt1_1, ptr %obj, i32 1)
		%result = call i32 %fptr(ptr %obj, i32 1)
		; NORETP: call i32 %
		ret i32 %result
		}

		; CHECK-LABEL: define i32 @fn4_rv
		; CHECK-NOT: call void (...) @llvm.icall.branch.funnel
		define i32 @fn4_rv(ptr %obj) #0 {
		%p = call i1 @llvm.type.test(ptr @vt1_1_rv, metadata !"typeid1_rv")
		call void @llvm.assume(i1 %p)
		%fptr = call ptr @llvm.load.relative.i32(ptr @vt1_1_rv, i32 0)
		; RETP: call i32 @__typeid_typeid1_rv_0_branch_funnel(ptr nest @vt1_1_rv, ptr %obj, i32 1)
		%result = call i32 %fptr(ptr %obj, i32 1)
		; NORETP: call i32 %
		ret i32 %result
		}

		; CHECK-LABEL: define i32 @fn4_rv_cpy
		; CHECK-NOT: call void (...) @llvm.icall.branch.funnel
		define i32 @fn4_rv_cpy(ptr %obj) #0 {
		%p = call i1 @llvm.type.test(ptr @vt1_1_rv, metadata !"typeid1_rv")
		call void @llvm.assume(i1 %p)
		%fptr = call ptr @llvm.load.relative.i32(ptr @vt1_1_rv, i32 0)
		; RETP: call i32 @__typeid_typeid1_rv_0_branch_funnel(ptr nest @vt1_1_rv, ptr %obj, i32 1)
		%result = call i32 %fptr(ptr %obj, i32 1)
		; NORETP: call i32 %
		ret i32 %result
		}

; CHECK-LABEL: define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...)		; CHECK-LABEL: define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...)
; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr {{(nonnull )?}}@vt1_1, ptr {{(nonnull )?}}@vf1_1, ptr {{(nonnull )?}}@vt1_2, ptr {{(nonnull )?}}@vf1_2, ...)		; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr {{(nonnull )?}}@vt1_1, ptr {{(nonnull )?}}@vf1_1, ptr {{(nonnull )?}}@vt1_2, ptr {{(nonnull )?}}@vf1_2, ...)

; CHECK-LABEL: define hidden void @__typeid_typeid1_rv_0_branch_funnel(ptr nest %0, ...)		; CHECK-LABEL: define hidden void @__typeid_typeid1_rv_0_branch_funnel(ptr nest %0, ...)
; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr {{(nonnull )?}}@vt1_1_rv, ptr {{(nonnull )?}}@vf1_1, ptr {{(nonnull )?}}@vt1_2_rv, ptr {{(nonnull )?}}@vf1_2, ...)		; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr {{(nonnull )?}}@vt1_1_rv, ptr {{(nonnull )?}}@vf1_1, ptr {{(nonnull )?}}@vt1_2_rv, ptr {{(nonnull )?}}@vf1_2, ...)

; CHECK: define internal void @branch_funnel(ptr		; CHECK: define internal void @branch_funnel(ptr
; CHECK: define internal void @branch_funnel.1(ptr		; CHECK: define internal void @branch_funnel.1(ptr
Show All 16 Lines