This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
lib/CodeGen/
-
CodeGen/
-
PPCGCodeGeneration.cpp

Differential D36867

[GPGPU] Synchronize after each kernel, not each copy out
ClosedPublic

Authored by grosser on Aug 18 2017, 3:49 AM.

Download Raw Diff

Details

Reviewers

Meinersbur
bollu
singam-sanjay

Commits

rG62acb344d08d: [GPGPU] Synchronize after each kernel, not each copy out
rPLO311155: [GPGPU] Synchronize after each kernel, not each copy out
rL311155: [GPGPU] Synchronize after each kernel, not each copy out

Summary

This change reduces the overall number of synchronize calls for kernels with
a lot of output data at the cost of additional synchronize calls for kernels
launched in sequence without any device to host transfers in between. As the
latter pattern is a lot less frequent, this seems a better tradeoff.

Even though the above motivation would be motivation enough, this is just
a step towards enabling ppcg to not compute to and from device copy calls
at all, which would be incorrect in case we still relied on these calls to
place our synchronization statements.

Diff Detail

Build Status

Buildable 9398
Build 9398: arc lint + arc unit

Event Timeline

grosser created this revision.Aug 18 2017, 3:49 AM

Herald added subscribers: kbarton, nemanjai. · View Herald TranscriptAug 18 2017, 3:49 AM

Harbormaster completed remote builds in B9398: Diff 111649.Aug 18 2017, 3:49 AM

Does this patch depend on some other patch? If so, please record this information by creating a Parent Revision. (Edit Related Revisions -> Edit Parent Revision). If some other patch depends on this, then make this a parent of that patch. Helps when looking up revisions later on.

Other than that, LGTM.

This revision is now accepted and ready to land.Aug 18 2017, 4:53 AM

grosser added a child revision: D36868: [GPGPU] Do not create copy statements when targetting managed memory.Aug 18 2017, 5:56 AM

Closed by commit rL311155: [GPGPU] Synchronize after each kernel, not each copy out (authored by grosser). · Explain WhyAug 18 2017, 5:56 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

lib/

CodeGen/

PPCGCodeGeneration.cpp

3 lines

Diff 111649

lib/CodeGen/PPCGCodeGeneration.cpp

Show First 20 Lines • Show All 1,213 Lines • ▼ Show 20 Lines	void GPUNodeBuilder::createUser(__isl_take isl_ast_node *UserStmt) {
isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);		isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
isl_id *Id = isl_ast_expr_get_id(StmtExpr);		isl_id *Id = isl_ast_expr_get_id(StmtExpr);
isl_id_free(Id);		isl_id_free(Id);
isl_ast_expr_free(StmtExpr);		isl_ast_expr_free(StmtExpr);

const char *Str = isl_id_get_name(Id);		const char *Str = isl_id_get_name(Id);
if (!strcmp(Str, "kernel")) {		if (!strcmp(Str, "kernel")) {
createKernel(UserStmt);		createKernel(UserStmt);
		if (PollyManagedMemory)
		createCallSynchronizeDevice();
isl_ast_expr_free(Expr);		isl_ast_expr_free(Expr);
return;		return;
}		}
if (!strcmp(Str, "init_device")) {		if (!strcmp(Str, "init_device")) {
initializeAfterRTH();		initializeAfterRTH();
isl_ast_node_free(UserStmt);		isl_ast_node_free(UserStmt);
isl_ast_expr_free(Expr);		isl_ast_expr_free(Expr);
return;		return;
Show All 13 Lines	if (isPrefix(Str, "to_device")) {
isl_ast_expr_free(Expr);		isl_ast_expr_free(Expr);
return;		return;
}		}

if (isPrefix(Str, "from_device")) {		if (isPrefix(Str, "from_device")) {
if (!PollyManagedMemory) {		if (!PollyManagedMemory) {
createDataTransfer(UserStmt, DEVICE_TO_HOST);		createDataTransfer(UserStmt, DEVICE_TO_HOST);
} else {		} else {
createCallSynchronizeDevice();
isl_ast_node_free(UserStmt);		isl_ast_node_free(UserStmt);
}		}
isl_ast_expr_free(Expr);		isl_ast_expr_free(Expr);
return;		return;
}		}

isl_id *Anno = isl_ast_node_get_annotation(UserStmt);		isl_id *Anno = isl_ast_node_get_annotation(UserStmt);
struct ppcg_kernel_stmt *KernelStmt =		struct ppcg_kernel_stmt *KernelStmt =
▲ Show 20 Lines • Show All 2,255 Lines • Show Last 20 Lines