This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Transforms/Utils/
-
Transforms/
-
Utils/
-
CodeExtractor.cpp
-
unittests/Transforms/Utils/
-
Transforms/
-
Utils/
1/1
CodeExtractorTest.cpp

Differential D90689

[CodeExtractor] Replace uses of extracted bitcasts in out-of-region lifetime markers
ClosedPublic

Authored by ggeorgakoudis on Nov 3 2020, 8:37 AM.

Download Raw Diff

Details

Reviewers

jdoerfert
vsk

Commits

rG700d2417d828: [CodeExtractor] Replace uses of extracted bitcasts in out-of-region lifetime…

Summary

CodeExtractor handles bitcasts in the extracted region that have
lifetime markers users in the outer region as outputs. That
creates unnecessary alloca/reload instructions and extra lifetime
markers. The patch identifies those cases, and replaces uses in
out-of-region lifetime markers with new bitcasts in the outer region.

Example

define void @foo() {
entry:
  %0 = alloca i32
  br label %extract

extract:
  %1 = bitcast i32* %0 to i8*
  call void @llvm.lifetime.start.p0i8(i64 4, i8* %1)
  call void @use(i32* %0)
  br label %exit

exit:
  call void @use(i32* %0)
  call void @llvm.lifetime.end.p0i8(i64 4, i8* %1)
  ret void
}

Current extraction

define void @foo() {
entry:
  %.loc = alloca i8*, align 8
  %0 = alloca i32, align 4
  br label %codeRepl

codeRepl:                                         ; preds = %entry
  %lt.cast = bitcast i8** %.loc to i8*
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast)
  %lt.cast1 = bitcast i32* %0 to i8*
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast1)
  call void @foo.extract(i32* %0, i8** %.loc)
  %.reload = load i8*, i8** %.loc, align 8
  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast)
  br label %exit

exit:                                             ; preds = %codeRepl
  call void @use(i32* %0)
  call void @llvm.lifetime.end.p0i8(i64 4, i8* %.reload)
  ret void
}

define internal void @foo.extract(i32* %0, i8** %.out) {
newFuncRoot:
  br label %extract

exit.exitStub:                                    ; preds = %extract
  ret void

extract:                                          ; preds = %newFuncRoot
  %1 = bitcast i32* %0 to i8*
  store i8* %1, i8** %.out, align 8
  call void @use(i32* %0)
  br label %exit.exitStub
}

Extraction with patch

define void @foo() {
entry:
  %0 = alloca i32, align 4
  br label %codeRepl

codeRepl:                                         ; preds = %entry
  %lt.cast1 = bitcast i32* %0 to i8*
  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast1)
  call void @foo.extract(i32* %0)
  br label %exit

exit:                                             ; preds = %codeRepl
  call void @use(i32* %0)
  %lt.cast = bitcast i32* %0 to i8*
  call void @llvm.lifetime.end.p0i8(i64 4, i8* %lt.cast)
  ret void
}

define internal void @foo.extract(i32* %0) {
newFuncRoot:
  br label %extract

exit.exitStub:                                    ; preds = %extract
  ret void

extract:                                          ; preds = %newFuncRoot
  %1 = bitcast i32* %0 to i8*
  call void @use(i32* %0)
  br label %exit.exitStub
}

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

ggeorgakoudis created this revision.Nov 3 2020, 8:37 AM

Herald added a project: Restricted Project. · View Herald TranscriptNov 3 2020, 8:37 AM

Herald added subscribers: llvm-commits, hiraditya. · View Herald Transcript

ggeorgakoudis requested review of this revision.Nov 3 2020, 8:37 AM

ggeorgakoudis added a reviewer: jdoerfert.Nov 3 2020, 8:38 AM

Format better the patch message

ggeorgakoudis edited the summary of this revision. (Show Details)Nov 3 2020, 8:56 AM

ggeorgakoudis added a reviewer: vsk.Nov 3 2020, 9:01 AM

sstefan1 added a subscriber: sstefan1.Nov 3 2020, 9:30 AM

Harbormaster completed remote builds in B77418: Diff 302598.Nov 3 2020, 9:41 AM

Harbormaster completed remote builds in B77416: Diff 302594.

In the example, what pass/component is responsible for creating %1 = bitcast i32* %0 to i8*?

If it's CodeExtractor itself, it would be nice to solve the problem earlier by getting rid of bitcasts which only exist to wire up lifetime markers. I tried doing that in 26ee8aff2, but had to revert (099bffe7f) because llvm doesn't support specializing its lifetime intrinsics on opaque pointer types. That's a bug, I think: if we fix that, perhaps this whole problem goes away.

OTOH, if this %1 = bitcast i32* %0 to i8* value comes from a frontend or another pass, I think the approach taken in this patch is reasonable. Please add a test (perhaps in the CodeExtractor unittest).

In D90689#2371522, @vsk wrote:

In the example, what pass/component is responsible for creating %1 = bitcast i32* %0 to i8*?

If it's CodeExtractor itself, it would be nice to solve the problem earlier by getting rid of bitcasts which only exist to wire up lifetime markers. I tried doing that in 26ee8aff2, but had to revert (099bffe7f) because llvm doesn't support specializing its lifetime intrinsics on opaque pointer types. That's a bug, I think: if we fix that, perhaps this whole problem goes away.

OTOH, if this %1 = bitcast i32* %0 to i8* value comes from a frontend or another pass, I think the approach taken in this patch is reasonable. Please add a test (perhaps in the CodeExtractor unittest).

Many thanks for the quick reply @vsk! The bitcast is generated by the frontend, not by CodeExtractor. Our use case is outlining a merged parallel region that includes sequential code which include bitcasts like that. I am going to add a unit test.

I have two questions about the CodeExtractor.

I have second thoughts on changing the IR within the findAllocas. Should I create a helper data structure, e.g., ReplaceBitcastUses as in for SinkCands, and do the transformation inside the extractCodeRegion caller?
Is the assumption that findAllocas is always called before findInputsOutputs? If findAllocas is not called, my concern is that findInputsOutputs will report an output on the use of the bitcast although this output would be removed if findAllocas has been called. This problem does not appear for extractCodeRegion because it always calls findAllocas but there may be other modules using the CodeExtractor interface. I could change findInputsOutputs not to include bitcast uses to lifetime markers as outputs regardless of whether findAllocas has been called. Is that reasonable or does that create other problems?

Thanks again!

In D90689#2371689, @ggeorgakoudis wrote:

In D90689#2371522, @vsk wrote:

In the example, what pass/component is responsible for creating %1 = bitcast i32* %0 to i8*?

If it's CodeExtractor itself, it would be nice to solve the problem earlier by getting rid of bitcasts which only exist to wire up lifetime markers. I tried doing that in 26ee8aff2, but had to revert (099bffe7f) because llvm doesn't support specializing its lifetime intrinsics on opaque pointer types. That's a bug, I think: if we fix that, perhaps this whole problem goes away.

OTOH, if this %1 = bitcast i32* %0 to i8* value comes from a frontend or another pass, I think the approach taken in this patch is reasonable. Please add a test (perhaps in the CodeExtractor unittest).

Many thanks for the quick reply @vsk! The bitcast is generated by the frontend, not by CodeExtractor. Our use case is outlining a merged parallel region that includes sequential code which include bitcasts like that. I am going to add a unit test.

I have two questions about the CodeExtractor.

I have second thoughts on changing the IR within the findAllocas. Should I create a helper data structure, e.g., ReplaceBitcastUses as in for SinkCands, and do the transformation inside the extractCodeRegion caller?

Is the assumption that findAllocas is always called before findInputsOutputs? If findAllocas is not called, my concern is that findInputsOutputs will report an output on the use of the bitcast although this output would be removed if findAllocas has been called. This problem does not appear for extractCodeRegion because it always calls findAllocas but there may be other modules using the CodeExtractor interface. I could change findInputsOutputs not to include bitcast uses to lifetime markers as outputs regardless of whether findAllocas has been called. Is that reasonable or does that create other problems?

The status quo is that findAllocas destructively updates the IR in ways that affect the results of findInputsOutputs: you're correct that this dependency can create issues. Imho CodeExtractor intermixes analysis and transformation too much. This makes it hard to improve its transformations (evidenced by your question (1)); it also makes it hard for client code to analyze extraction profitability, since you need to do the extraction to figure out what the real input/output set will be. (Before extractCodeRegions, you can query findInputsOutputs, but it's just guessing.)

Taking a short-term view, having findAllocas perform a new destructive update would get the job done and be in keeping with the current design. In the long term, I think it'd be nice to have a redesigned CodeExtractor that keeps analysis and transformation neatly separated. (that said, I'm not sure how best to achieve that (.. and I'm no longer actively working on it).)

Add unit test

I see your point @vsk. I have added a unit test. Anything else missing for acceptance?

Harbormaster completed remote builds in B77739: Diff 303182.Nov 5 2020, 12:51 PM

Thanks, lgtm.

llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
327	nit, it's a bit more idiomatic to write `EXPECT_EQ(outputs.size(), 0U)`.

This revision is now accepted and ready to land.Nov 5 2020, 2:01 PM

ggeorgakoudis marked an inline comment as done.Nov 5 2020, 4:22 PM

This revision was landed with ongoing or failed builds.Nov 5 2020, 5:01 PM

Closed by commit rG700d2417d828: [CodeExtractor] Replace uses of extracted bitcasts in out-of-region lifetime… (authored by ggeorgakoudis). · Explain Why

This revision was automatically updated to reflect the committed changes.

ggeorgakoudis added a commit: rG700d2417d828: [CodeExtractor] Replace uses of extracted bitcasts in out-of-region lifetime….

Revision Contents

Path

Size

llvm/

lib/

Transforms/

Utils/

CodeExtractor.cpp

40 lines

unittests/

Transforms/

Utils/

CodeExtractorTest.cpp

49 lines

Diff 303301

llvm/lib/Transforms/Utils/CodeExtractor.cpp

Show First 20 Lines • Show All 529 Lines • ▼ Show 20 Lines	for (AllocaInst *AI : CEAC.getAllocas()) {
LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock);		LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock);
bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);		bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
if (Moved) {		if (Moved) {
LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");		LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
SinkCands.insert(AI);		SinkCands.insert(AI);
continue;		continue;
}		}

		// Find bitcasts in the outlined region that have lifetime marker users
		// outside that region. Replace the lifetime marker use with an
		// outside region bitcast to avoid unnecessary alloca/reload instructions
		// and extra lifetime markers.
		SmallVector<Instruction *, 2> LifetimeBitcastUsers;
		for (User *U : AI->users()) {
		if (!definedInRegion(Blocks, U))
		continue;

		if (U->stripInBoundsConstantOffsets() != AI)
		continue;

		Instruction *Bitcast = cast<Instruction>(U);
		for (User *BU : Bitcast->users()) {
		IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(BU);
		if (!IntrInst)
		continue;

		if (!IntrInst->isLifetimeStartOrEnd())
		continue;

		if (definedInRegion(Blocks, IntrInst))
		continue;

		LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast"
		<< *Bitcast << " in out-of-region lifetime marker "
		<< *IntrInst << "\n");
		LifetimeBitcastUsers.push_back(IntrInst);
		}
		}

		for (Instruction *I : LifetimeBitcastUsers) {
		Module *M = AIFunc->getParent();
		LLVMContext &Ctx = M->getContext();
		auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
		CastInst *CastI =
		CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I);
		I->replaceUsesOfWith(I->getOperand(1), CastI);
		}

// Follow any bitcasts.		// Follow any bitcasts.
SmallVector<Instruction *, 2> Bitcasts;		SmallVector<Instruction *, 2> Bitcasts;
SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;		SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
for (User *U : AI->users()) {		for (User *U : AI->users()) {
if (U->stripInBoundsConstantOffsets() == AI) {		if (U->stripInBoundsConstantOffsets() == AI) {
Instruction *Bitcast = cast<Instruction>(U);		Instruction *Bitcast = cast<Instruction>(U);
LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock);		LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock);
if (LMI.LifeStart) {		if (LMI.LifeStart) {
▲ Show 20 Lines • Show All 1,223 Lines • Show Last 20 Lines

llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp

Show First 20 Lines • Show All 276 Lines • ▼ Show 20 Lines	TEST(CodeExtractor, ExtractAndInvalidateAssumptionCache) {

CodeExtractorAnalysisCache CEAC(*Func);		CodeExtractorAnalysisCache CEAC(*Func);
Function *Outlined = CE.extractCodeRegion(CEAC);		Function *Outlined = CE.extractCodeRegion(CEAC);
EXPECT_TRUE(Outlined);		EXPECT_TRUE(Outlined);
EXPECT_FALSE(verifyFunction(*Outlined));		EXPECT_FALSE(verifyFunction(*Outlined));
EXPECT_FALSE(verifyFunction(*Func));		EXPECT_FALSE(verifyFunction(*Func));
EXPECT_FALSE(CE.verifyAssumptionCache(Func, Outlined, &AC));		EXPECT_FALSE(CE.verifyAssumptionCache(Func, Outlined, &AC));
}		}

		TEST(CodeExtractor, RemoveBitcastUsesFromOuterLifetimeMarkers) {
		LLVMContext Ctx;
		SMDiagnostic Err;
		std::unique_ptr<Module> M(parseAssemblyString(R"ir(
		target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
		target triple = "x86_64-unknown-linux-gnu"

		declare void @use(i32*)
		declare void @llvm.lifetime.start.p0i8(i64, i8*)
		declare void @llvm.lifetime.end.p0i8(i64, i8*)

		define void @foo() {
		entry:
		%0 = alloca i32
		br label %extract

		extract:
		%1 = bitcast i32* %0 to i8*
		call void @llvm.lifetime.start.p0i8(i64 4, i8* %1)
		call void @use(i32* %0)
		br label %exit

		exit:
		call void @use(i32* %0)
		call void @llvm.lifetime.end.p0i8(i64 4, i8* %1)
		ret void
		}
		)ir",
		Err, Ctx));

		Function *Func = M->getFunction("foo");
		SmallVector<BasicBlock *, 1> Blocks{getBlockByName(Func, "extract")};

		CodeExtractor CE(Blocks);
		EXPECT_TRUE(CE.isEligible());

		CodeExtractorAnalysisCache CEAC(*Func);
		SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
		BasicBlock *CommonExit = nullptr;
		CE.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
		CE.findInputsOutputs(Inputs, Outputs, SinkingCands);
		EXPECT_EQ(Outputs.size(), 0U);
		vskUnsubmitted Done Reply Inline Actions nit, it's a bit more idiomatic to write `EXPECT_EQ(outputs.size(), 0U)`. vsk: nit, it's a bit more idiomatic to write `EXPECT_EQ(outputs.size(), 0U)`.

		Function *Outlined = CE.extractCodeRegion(CEAC);
		EXPECT_TRUE(Outlined);
		EXPECT_FALSE(verifyFunction(*Outlined));
		EXPECT_FALSE(verifyFunction(*Func));
		}
} // end anonymous namespace		} // end anonymous namespace