This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
clang/lib/Driver/
-
lib/
-
Driver/
-
Driver.cpp

Differential D69124

[clang][driver] Print compilation phases with indentation.
ClosedPublic

Authored by hliao on Oct 17 2019, 11:06 AM.

Download Raw Diff

Details

Reviewers

tra
sfantao
echristo

Commits

rGd7a487adfedb: [clang][driver] Print compilation phases with indentation.
rC375310: [clang][driver] Print compilation phases with indentation.
rL375310: [clang][driver] Print compilation phases with indentation.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

hliao created this revision.Oct 17 2019, 11:06 AM

Herald added a project: Restricted Project. · View Herald TranscriptOct 17 2019, 11:06 AM

Herald added a subscriber: cfe-commits. · View Herald Transcript

this patch enables the dumping of actions in the hierarchy or tree. In most cases, it's a linear list but, for offload compilation, a tree representation is more intuitive. Even though there are cross-subtree edges, they are rare and also noted in the corresponding actions.

Harbormaster completed remote builds in B39741: Diff 225472.Oct 17 2019, 11:11 AM

Could you give an example of before/after output?

In D69124#1713360, @tra wrote:

Could you give an example of before/after output?

$ clang -x cuda -ccc-print-phases --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_60 -c ~/dummy.cpp 
     0: input, "/home/michliao/dummy.cpp", cuda, (host-cuda)
    1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
   2: compiler, {1}, ir, (host-cuda)
         3: input, "/home/michliao/dummy.cpp", cuda, (device-cuda, sm_30)
        4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_30)
       5: compiler, {4}, ir, (device-cuda, sm_30)
      6: backend, {5}, assembler, (device-cuda, sm_30)
     7: assembler, {6}, object, (device-cuda, sm_30)
    8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {7}, object
    9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {6}, assembler
         10: input, "/home/michliao/dummy.cpp", cuda, (device-cuda, sm_60)
        11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_60)
       12: compiler, {11}, ir, (device-cuda, sm_60)
      13: backend, {12}, assembler, (device-cuda, sm_60)
     14: assembler, {13}, object, (device-cuda, sm_60)
    15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_60)" {14}, object
    16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_60)" {13}, assembler
   17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
  18: offload, "host-cuda (x86_64-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
 19: backend, {18}, assembler, (host-cuda)
20: assembler, {19}, object, (host-cuda)

In D69124#1713360, @tra wrote:

Could you give an example of before/after output?

For HIP

$ clang -x hip -ccc-print-phases --cuda-gpu-arch=gfx900 --cuda-gpu-arch=gfx906 -c ~/dummy.cpp 
     0: input, "/home/michliao/dummy.cpp", hip, (host-hip)
    1: preprocessor, {0}, hip-cpp-output, (host-hip)
   2: compiler, {1}, ir, (host-hip)
        3: input, "/home/michliao/dummy.cpp", hip, (device-hip, gfx900)
       4: preprocessor, {3}, hip-cpp-output, (device-hip, gfx900)
      5: compiler, {4}, ir, (device-hip, gfx900)
     6: linker, {5}, image, (device-hip, gfx900)
    7: offload, "device-hip (amdgcn-amd-amdhsa:gfx900)" {6}, image
        8: input, "/home/michliao/dummy.cpp", hip, (device-hip, gfx906)
       9: preprocessor, {8}, hip-cpp-output, (device-hip, gfx906)
      10: compiler, {9}, ir, (device-hip, gfx906)
     11: linker, {10}, image, (device-hip, gfx906)
    12: offload, "device-hip (amdgcn-amd-amdhsa:gfx906)" {11}, image
   13: linker, {7, 12}, hip-fatbin, (device-hip)
  14: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, "device-hip (amdgcn-amd-amdhsa)" {13}, ir
 15: backend, {14}, assembler, (host-hip)
16: assembler, {15}, object, (host-hip)

This is... rather oddly-structured output. My brain refuses to accept that the most-indented phase is the input.
Perhaps we should do llvm::errs().indent(MaxIdent-Ident). This should give us something like this (withMaxIdent=9), which is somewhat easier to grok, IMO:

    0: input, "/home/michliao/dummy.cpp", cuda, (host-cuda)
     1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
      2: compiler, {1}, ir, (host-cuda)
3: input, "/home/michliao/dummy.cpp", cuda, (device-cuda, sm_30)
 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_30)
  5: compiler, {4}, ir, (device-cuda, sm_30)
   6: backend, {5}, assembler, (device-cuda, sm_30)
    7: assembler, {6}, object, (device-cuda, sm_30)
     8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {7}, object
     9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {6}, assembler
10: input, "/home/michliao/dummy.cpp", cuda, (device-cuda, sm_60)
 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_60)
  12: compiler, {11}, ir, (device-cuda, sm_60)
   13: backend, {12}, assembler, (device-cuda, sm_60)
    14: assembler, {13}, object, (device-cuda, sm_60)
     15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_60)" {14}, object
     16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_60)" {13}, assembler
      17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
       18: offload, "host-cuda (x86_64-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
        19: backend, {18}, assembler, (host-cuda)
         20: assembler, {19}, object, (host-cuda)

revise the output by drawing tree lines. now, the output looks like

$ clang -x cuda -ccc-print-phases --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_60 -c dummy.cpp
            +- 0: input, "/home/michliao/dummy.cpp", cuda, (host-cuda)
         +- 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
      +- 2: compiler, {1}, ir, (host-cuda)
      |                 +- 3: input, "/home/michliao/dummy.cpp", cuda, (device-cuda, sm_30)
      |              +- 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_30)
      |           +- 5: compiler, {4}, ir, (device-cuda, sm_30)
      |        +- 6: backend, {5}, assembler, (device-cuda, sm_30)
      |     +- 7: assembler, {6}, object, (device-cuda, sm_30)
      |  +- 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {7}, object
      |  |- 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {6}, assembler
      |  |              +- 10: input, "/home/michliao/dummy.cpp", cuda, (device-cuda, sm_60)
      |  |           +- 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_60)
      |  |        +- 12: compiler, {11}, ir, (device-cuda, sm_60)
      |  |     +- 13: backend, {12}, assembler, (device-cuda, sm_60)
      |  |  +- 14: assembler, {13}, object, (device-cuda, sm_60)
      |  |- 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_60)" {14}, object
      |  |- 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_60)" {13}, assembler
      |- 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
   +- 18: offload, "host-cuda (x86_64-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
+- 19: backend, {18}, assembler, (host-cuda)
20: assembler, {19}, object, (host-cuda)

$ clang -x hip -ccc-print-phases --cuda-gpu-arch=gfx900 --cuda-gpu-arch=gfx906 -c dummy.cpp
            +- 0: input, "/home/michliao/dummy.cpp", hip, (host-hip)
         +- 1: preprocessor, {0}, hip-cpp-output, (host-hip)
      +- 2: compiler, {1}, ir, (host-hip)
      |              +- 3: input, "/home/michliao/dummy.cpp", hip, (device-hip, gfx900)
      |           +- 4: preprocessor, {3}, hip-cpp-output, (device-hip, gfx900)
      |        +- 5: compiler, {4}, ir, (device-hip, gfx900)
      |     +- 6: linker, {5}, image, (device-hip, gfx900)
      |  +- 7: offload, "device-hip (amdgcn-amd-amdhsa:gfx900)" {6}, image
      |  |           +- 8: input, "/home/michliao/dummy.cpp", hip, (device-hip, gfx906)
      |  |        +- 9: preprocessor, {8}, hip-cpp-output, (device-hip, gfx906)
      |  |     +- 10: compiler, {9}, ir, (device-hip, gfx906)
      |  |  +- 11: linker, {10}, image, (device-hip, gfx906)
      |  |- 12: offload, "device-hip (amdgcn-amd-amdhsa:gfx906)" {11}, image
      |- 13: linker, {7, 12}, hip-fatbin, (device-hip)
   +- 14: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, "device-hip (amdgcn-amd-amdhsa)" {13}, ir
+- 15: backend, {14}, assembler, (host-hip)
16: assembler, {15}, object, (host-hip)

In D69124#1713427, @tra wrote:

    0: input, "/home/michliao/dummy.cpp", cuda, (host-cuda)
     1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
      2: compiler, {1}, ir, (host-cuda)
3: input, "/home/michliao/dummy.cpp", cuda, (device-cuda, sm_30)
 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_30)
  5: compiler, {4}, ir, (device-cuda, sm_30)
   6: backend, {5}, assembler, (device-cuda, sm_30)
    7: assembler, {6}, object, (device-cuda, sm_30)
     8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {7}, object
     9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {6}, assembler
10: input, "/home/michliao/dummy.cpp", cuda, (device-cuda, sm_60)
 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_60)
  12: compiler, {11}, ir, (device-cuda, sm_60)
   13: backend, {12}, assembler, (device-cuda, sm_60)
    14: assembler, {13}, object, (device-cuda, sm_60)
     15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_60)" {14}, object
     16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_60)" {13}, assembler
      17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
       18: offload, "host-cuda (x86_64-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
        19: backend, {18}, assembler, (host-cuda)
         20: assembler, {19}, object, (host-cuda)

As the top-level actions are the last actions to be performed, they should have no indentation

In D69124#1713427, @tra wrote:

    0: input, "/home/michliao/dummy.cpp", cuda, (host-cuda)
     1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
      2: compiler, {1}, ir, (host-cuda)
3: input, "/home/michliao/dummy.cpp", cuda, (device-cuda, sm_30)
 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_30)
  5: compiler, {4}, ir, (device-cuda, sm_30)
   6: backend, {5}, assembler, (device-cuda, sm_30)
    7: assembler, {6}, object, (device-cuda, sm_30)
     8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {7}, object
     9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {6}, assembler
10: input, "/home/michliao/dummy.cpp", cuda, (device-cuda, sm_60)
 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_60)
  12: compiler, {11}, ir, (device-cuda, sm_60)
   13: backend, {12}, assembler, (device-cuda, sm_60)
    14: assembler, {13}, object, (device-cuda, sm_60)
     15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_60)" {14}, object
     16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_60)" {13}, assembler
      17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
       18: offload, "host-cuda (x86_64-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
        19: backend, {18}, assembler, (host-cuda)
         20: assembler, {19}, object, (host-cuda)

it's difficult to choose a proper MaxIdent though to avoid unnecessary leading whitespaces or misalign the output. How about we draw the tree edges in the original output? It looks much easier for me to identify sibling actions.

Harbormaster completed remote builds in B39778: Diff 225620.Oct 18 2019, 7:51 AM

Neat. I like the visual cues showing what gets passed on to the next processing stage.

This revision is now accepted and ready to land.Oct 18 2019, 4:15 PM

Closed by commit rGd7a487adfedb: [clang][driver] Print compilation phases with indentation. (authored by hliao). · Explain WhyOct 18 2019, 5:19 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

clang/

lib/

Driver/

Driver.cpp

29 lines

Diff 225721

clang/lib/Driver/Driver.cpp

Show First 20 Lines • Show All 1,796 Lines • ▼ Show 20 Lines	if (C.getArgs().hasArg(options::OPT_print_effective_triple)) {
const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));		const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));
llvm::outs() << Triple.getTriple() << "\n";		llvm::outs() << Triple.getTriple() << "\n";
return false;		return false;
}		}

return true;		return true;
}		}

		enum {
		TopLevelAction = 0,
		HeadSibAction = 1,
		OtherSibAction = 2,
		};

// Display an action graph human-readably. Action A is the "sink" node		// Display an action graph human-readably. Action A is the "sink" node
// and latest-occuring action. Traversal is in pre-order, visiting the		// and latest-occuring action. Traversal is in pre-order, visiting the
// inputs to each action before printing the action itself.		// inputs to each action before printing the action itself.
static unsigned PrintActions1(const Compilation &C, Action *A,		static unsigned PrintActions1(const Compilation &C, Action *A,
std::map<Action *, unsigned> &Ids) {		std::map<Action *, unsigned> &Ids,
		Twine Indent = {}, int Kind = TopLevelAction) {
if (Ids.count(A)) // A was already visited.		if (Ids.count(A)) // A was already visited.
return Ids[A];		return Ids[A];

std::string str;		std::string str;
llvm::raw_string_ostream os(str);		llvm::raw_string_ostream os(str);

		auto getSibIndent = [](int K) -> Twine {
		return (K == HeadSibAction) ? " " : (K == OtherSibAction) ? "\| " : "";
		};

		Twine SibIndent = Indent + getSibIndent(Kind);
		int SibKind = HeadSibAction;
os << Action::getClassName(A->getKind()) << ", ";		os << Action::getClassName(A->getKind()) << ", ";
if (InputAction *IA = dyn_cast<InputAction>(A)) {		if (InputAction *IA = dyn_cast<InputAction>(A)) {
os << "\"" << IA->getInputArg().getValue() << "\"";		os << "\"" << IA->getInputArg().getValue() << "\"";
} else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {		} else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
os << '"' << BIA->getArchName() << '"' << ", {"		os << '"' << BIA->getArchName() << '"' << ", {"
<< PrintActions1(C, *BIA->input_begin(), Ids) << "}";		<< PrintActions1(C, *BIA->input_begin(), Ids, SibIndent, SibKind) << "}";
} else if (OffloadAction *OA = dyn_cast<OffloadAction>(A)) {		} else if (OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
bool IsFirst = true;		bool IsFirst = true;
OA->doOnEachDependence(		OA->doOnEachDependence(
[&](Action A, const ToolChain TC, const char *BoundArch) {		[&](Action A, const ToolChain TC, const char *BoundArch) {
// E.g. for two CUDA device dependences whose bound arch is sm_20 and		// E.g. for two CUDA device dependences whose bound arch is sm_20 and
// sm_35 this will generate:		// sm_35 this will generate:
// "cuda-device" (nvptx64-nvidia-cuda:sm_20) {#ID}, "cuda-device"		// "cuda-device" (nvptx64-nvidia-cuda:sm_20) {#ID}, "cuda-device"
// (nvptx64-nvidia-cuda:sm_35) {#ID}		// (nvptx64-nvidia-cuda:sm_35) {#ID}
if (!IsFirst)		if (!IsFirst)
os << ", ";		os << ", ";
os << '"';		os << '"';
if (TC)		if (TC)
os << A->getOffloadingKindPrefix();		os << A->getOffloadingKindPrefix();
else		else
os << "host";		os << "host";
os << " (";		os << " (";
os << TC->getTriple().normalize();		os << TC->getTriple().normalize();

if (BoundArch)		if (BoundArch)
os << ":" << BoundArch;		os << ":" << BoundArch;
os << ")";		os << ")";
os << '"';		os << '"';
os << " {" << PrintActions1(C, A, Ids) << "}";		os << " {" << PrintActions1(C, A, Ids, SibIndent, SibKind) << "}";
IsFirst = false;		IsFirst = false;
		SibKind = OtherSibAction;
});		});
} else {		} else {
const ActionList *AL = &A->getInputs();		const ActionList *AL = &A->getInputs();

if (AL->size()) {		if (AL->size()) {
const char *Prefix = "{";		const char *Prefix = "{";
for (Action PreRequisite : AL) {		for (Action PreRequisite : AL) {
os << Prefix << PrintActions1(C, PreRequisite, Ids);		os << Prefix << PrintActions1(C, PreRequisite, Ids, SibIndent, SibKind);
Prefix = ", ";		Prefix = ", ";
		SibKind = OtherSibAction;
}		}
os << "}";		os << "}";
} else		} else
os << "{}";		os << "{}";
}		}

// Append offload info for all options other than the offloading action		// Append offload info for all options other than the offloading action
// itself (e.g. (cuda-device, sm_20) or (cuda-host)).		// itself (e.g. (cuda-device, sm_20) or (cuda-host)).
std::string offload_str;		std::string offload_str;
llvm::raw_string_ostream offload_os(offload_str);		llvm::raw_string_ostream offload_os(offload_str);
if (!isa<OffloadAction>(A)) {		if (!isa<OffloadAction>(A)) {
auto S = A->getOffloadingKindPrefix();		auto S = A->getOffloadingKindPrefix();
if (!S.empty()) {		if (!S.empty()) {
offload_os << ", (" << S;		offload_os << ", (" << S;
if (A->getOffloadingArch())		if (A->getOffloadingArch())
offload_os << ", " << A->getOffloadingArch();		offload_os << ", " << A->getOffloadingArch();
offload_os << ")";		offload_os << ")";
}		}
}		}

		auto getSelfIndent = [](int K) -> Twine {
		return (K == HeadSibAction) ? "+- " : (K == OtherSibAction) ? "\|- " : "";
		};

unsigned Id = Ids.size();		unsigned Id = Ids.size();
Ids[A] = Id;		Ids[A] = Id;
llvm::errs() << Id << ": " << os.str() << ", "		llvm::errs() << Indent + getSelfIndent(Kind) << Id << ": " << os.str() << ", "
<< types::getTypeName(A->getType()) << offload_os.str() << "\n";		<< types::getTypeName(A->getType()) << offload_os.str() << "\n";

return Id;		return Id;
}		}

// Print the action graphs in a compilation C.		// Print the action graphs in a compilation C.
// For example "clang -c file1.c file2.c" is composed of two subgraphs.		// For example "clang -c file1.c file2.c" is composed of two subgraphs.
void Driver::PrintActions(const Compilation &C) const {		void Driver::PrintActions(const Compilation &C) const {
▲ Show 20 Lines • Show All 3,055 Lines • Show Last 20 Lines