# openmp/trunk/runtime/tools/summarizeStats.py

1 | #!/usr/bin/python | ||||
---|---|---|---|---|---|

2 | | ||||

3 | import pandas as pd | ||||

4 | import numpy as np | ||||

5 | import re | ||||

6 | import sys | ||||

7 | import os | ||||

8 | import argparse | ||||

9 | import matplotlib | ||||

10 | from matplotlib import pyplot as plt | ||||

11 | from matplotlib.projections.polar import PolarAxes | ||||

12 | from matplotlib.projections import register_projection | ||||

13 | | ||||

14 | """ | ||||

15 | Read the stats file produced by the OpenMP runtime | ||||

16 | and produce a processed summary | ||||

17 | | ||||

18 | The radar_factory original code was taken from | ||||

19 | matplotlib.org/examples/api/radar_chart.html | ||||

20 | We added support to handle negative values for radar charts | ||||

21 | """ | ||||

22 | | ||||

23 | def radar_factory(num_vars, frame='circle'): | ||||

24 | """Create a radar chart with num_vars axes.""" | ||||

25 | # calculate evenly-spaced axis angles | ||||

26 | theta = 2*np.pi * np.linspace(0, 1-1./num_vars, num_vars) | ||||

27 | # rotate theta such that the first axis is at the top | ||||

28 | #theta += np.pi/2 | ||||

29 | | ||||

30 | def draw_poly_frame(self, x0, y0, r): | ||||

31 | # TODO: use transforms to convert (x, y) to (r, theta) | ||||

32 | verts = [(r*np.cos(t) + x0, r*np.sin(t) + y0) for t in theta] | ||||

33 | return plt.Polygon(verts, closed=True, edgecolor='k') | ||||

34 | | ||||

35 | def draw_circle_frame(self, x0, y0, r): | ||||

36 | return plt.Circle((x0, y0), r) | ||||

37 | | ||||

38 | frame_dict = {'polygon': draw_poly_frame, 'circle': draw_circle_frame} | ||||

39 | if frame not in frame_dict: | ||||

40 | raise ValueError, 'unknown value for `frame`: %s' % frame | ||||

41 | | ||||

42 | class RadarAxes(PolarAxes): | ||||

43 | """ | ||||

44 | Class for creating a radar chart (a.k.a. a spider or star chart) | ||||

45 | | ||||

46 | http://en.wikipedia.org/wiki/Radar_chart | ||||

47 | """ | ||||

48 | name = 'radar' | ||||

49 | # use 1 line segment to connect specified points | ||||

50 | RESOLUTION = 1 | ||||

51 | # define draw_frame method | ||||

52 | draw_frame = frame_dict[frame] | ||||

53 | | ||||

54 | def fill(self, *args, **kwargs): | ||||

55 | """Override fill so that line is closed by default""" | ||||

56 | closed = kwargs.pop('closed', True) | ||||

57 | return super(RadarAxes, self).fill(closed=closed, *args, **kwargs) | ||||

58 | | ||||

59 | def plot(self, *args, **kwargs): | ||||

60 | """Override plot so that line is closed by default""" | ||||

61 | lines = super(RadarAxes, self).plot(*args, **kwargs) | ||||

62 | #for line in lines: | ||||

63 | # self._close_line(line) | ||||

64 | | ||||

65 | def set_varlabels(self, labels): | ||||

66 | self.set_thetagrids(theta * 180/np.pi, labels,fontsize=14) | ||||

67 | | ||||

68 | def _gen_axes_patch(self): | ||||

69 | x0, y0 = (0.5, 0.5) | ||||

70 | r = 0.5 | ||||

71 | return self.draw_frame(x0, y0, r) | ||||

72 | | ||||

73 | register_projection(RadarAxes) | ||||

74 | return theta | ||||

75 | | ||||

76 | # Code to read the raw stats | ||||

77 | def extractSI(s): | ||||

78 | """Convert a measurement with a range suffix into a suitably scaled value""" | ||||

79 | du = s.split() | ||||

80 | num = float(du[0]) | ||||

81 | units = du[1] if len(du) == 2 else ' ' | ||||

82 | # http://physics.nist.gov/cuu/Units/prefixes.html | ||||

83 | factor = {'Y': 1e24, | ||||

84 | 'Z': 1e21, | ||||

85 | 'E': 1e18, | ||||

86 | 'P': 1e15, | ||||

87 | 'T': 1e12, | ||||

88 | 'G': 1e9, | ||||

89 | 'M': 1e6, | ||||

90 | 'k': 1e3, | ||||

91 | ' ': 1 , | ||||

92 | 'm': -1e3, # Yes, I do mean that, see below for the explanation. | ||||

93 | 'u': -1e6, | ||||

94 | 'n': -1e9, | ||||

95 | 'p': -1e12, | ||||

96 | 'f': -1e15, | ||||

97 | 'a': -1e18, | ||||

98 | 'z': -1e21, | ||||

99 | 'y': -1e24}[units[0]] | ||||

100 | # Minor trickery here is an attempt to preserve accuracy by using a single | ||||

101 | # divide, rather than multiplying by 1/x, which introduces two roundings | ||||

102 | # since 1/10 is not representable perfectly in IEEE floating point. (Not | ||||

103 | # that this really matters, other than for cleanliness, since we're likely | ||||

104 | # reading numbers with at most five decimal digits of precision). | ||||

105 | return num*factor if factor > 0 else num/-factor | ||||

106 | | ||||

107 | def readData(f): | ||||

108 | line = f.readline() | ||||

109 | fieldnames = [x.strip() for x in line.split(',')] | ||||

110 | line = f.readline().strip() | ||||

111 | data = [] | ||||

112 | while line != "": | ||||

113 | if line[0] != '#': | ||||

114 | fields = line.split(',') | ||||

115 | data.append ((fields[0].strip(), [extractSI(v) for v in fields[1:]])) | ||||

116 | line = f.readline().strip() | ||||

117 | # Man, working out this next incantation out was non-trivial! | ||||

118 | # They really want you to be snarfing data in csv or some other | ||||

119 | # format they understand! | ||||

120 | res = pd.DataFrame.from_items(data, columns=fieldnames[1:], orient='index') | ||||

121 | return res | ||||

122 | | ||||

123 | def readTimers(f): | ||||

124 | """Skip lines with leading #""" | ||||

125 | line = f.readline() | ||||

126 | while line[0] == '#': | ||||

127 | line = f.readline() | ||||

128 | line = line.strip() | ||||

129 | if line == "Statistics on exit\n" or "Aggregate for all threads\n": | ||||

130 | line = f.readline() | ||||

131 | return readData(f) | ||||

132 | | ||||

133 | def readCounters(f): | ||||

134 | """This can be just the same!""" | ||||

135 | return readData(f) | ||||

136 | | ||||

137 | def readFile(fname): | ||||

138 | """Read the statistics from the file. Return a dict with keys "timers", "counters" """ | ||||

139 | res = {} | ||||

140 | try: | ||||

141 | with open(fname) as f: | ||||

142 | res["timers"] = readTimers(f) | ||||

143 | res["counters"] = readCounters(f) | ||||

144 | return res | ||||

145 | except (OSError, IOError): | ||||

146 | print "Cannot open " + fname | ||||

147 | return None | ||||

148 | | ||||

149 | def usefulValues(l): | ||||

150 | """I.e. values which are neither null nor zero""" | ||||

151 | return [p and q for (p,q) in zip (pd.notnull(l), l != 0.0)] | ||||

152 | | ||||

153 | def uselessValues(l): | ||||

154 | """I.e. values which are null or zero""" | ||||

155 | return [not p for p in usefulValues(l)] | ||||

156 | | ||||

157 | interestingStats = ("counters", "timers") | ||||

158 | statProperties = {"counters" : ("Count", "Counter Statistics"), | ||||

159 | "timers" : ("Time (ticks)", "Timer Statistics") | ||||

160 | } | ||||

161 | | ||||

162 | def drawChart(data, kind, filebase): | ||||

163 | """Draw a summary bar chart for the requested data frame into the specified file""" | ||||

164 | data["Mean"].plot(kind="bar", logy=True, grid=True, colormap="GnBu", | ||||

165 | yerr=data["SD"], ecolor="black") | ||||

166 | plt.xlabel("OMP Constructs") | ||||

167 | plt.ylabel(statProperties[kind][0]) | ||||

168 | plt.title (statProperties[kind][1]) | ||||

169 | plt.tight_layout() | ||||

170 | plt.savefig(filebase+"_"+kind) | ||||

171 | | ||||

172 | def normalizeValues(data, countField, factor): | ||||

173 | """Normalize values into a rate by dividing them all by the given factor""" | ||||

174 | data[[k for k in data.keys() if k != countField]] /= factor | ||||

175 | | ||||

176 | | ||||

177 | def setRadarFigure(titles): | ||||

178 | """Set the attributes for the radar plots""" | ||||

179 | fig = plt.figure(figsize=(9,9)) | ||||

180 | rect = [0.1, 0.1, 0.8, 0.8] | ||||

181 | labels = [0.2, 0.4, 0.6, 0.8, 1, 2, 3, 4, 5, 10] | ||||

182 | matplotlib.rcParams.update({'font.size':13}) | ||||

183 | theta = radar_factory(len(titles)) | ||||

184 | ax = fig.add_axes(rect, projection='radar') | ||||

185 | ax.set_rgrids(labels) | ||||

186 | ax.set_varlabels(titles) | ||||

187 | ax.text(theta[2], 1, "Linear->Log", horizontalalignment='center', color='green', fontsize=18) | ||||

188 | return {'ax':ax, 'theta':theta} | ||||

189 | | ||||

190 | | ||||

191 | def drawRadarChart(data, kind, filebase, params, color): | ||||

192 | """Draw the radar plots""" | ||||

193 | tmp_lin = data * 0 | ||||

194 | tmp_log = data * 0 | ||||

195 | for key in data.keys(): | ||||

196 | if data[key] >= 1: | ||||

197 | tmp_log[key] = np.log10(data[key]) | ||||

198 | else: | ||||

199 | tmp_lin[key] = (data[key]) | ||||

200 | params['ax'].plot(params['theta'], tmp_log, color='b', label=filebase+"_"+kind+"_log") | ||||

201 | params['ax'].plot(params['theta'], tmp_lin, color='r', label=filebase+"_"+kind+"_linear") | ||||

202 | params['ax'].legend(loc='best', bbox_to_anchor=(1.4,1.2)) | ||||

203 | params['ax'].set_rlim((0, np.ceil(max(tmp_log)))) | ||||

204 | | ||||

205 | def multiAppBarChartSettings(ax, plt, index, width, n, tmp, s): | ||||

206 | ax.set_yscale('log') | ||||

207 | ax.legend() | ||||

208 | ax.set_xticks(index + width * n / 2) | ||||

209 | ax.set_xticklabels(tmp[s]['Total'].keys(), rotation=50, horizontalalignment='right') | ||||

210 | plt.xlabel("OMP Constructs") | ||||

211 | plt.ylabel(statProperties[s][0]) | ||||

212 | plt.title(statProperties[s][1]) | ||||

213 | plt.tight_layout() | ||||

214 | | ||||

215 | def derivedTimerStats(data): | ||||

216 | stats = {} | ||||

217 | for key in data.keys(): | ||||

218 | if key == 'OMP_worker_thread_life': | ||||

219 | totalRuntime = data['OMP_worker_thread_life'] | ||||

220 | elif key in ('FOR_static_iterations', 'OMP_PARALLEL_args', | ||||

221 | 'OMP_set_numthreads', 'FOR_dynamic_iterations'): | ||||

222 | break | ||||

223 | else: | ||||

224 | stats[key] = 100 * data[key] / totalRuntime | ||||

225 | return stats | ||||

226 | | ||||

227 | def compPie(data): | ||||

228 | compKeys = {} | ||||

229 | nonCompKeys = {} | ||||

230 | for key in data.keys(): | ||||

231 | if key in ('OMP_critical', 'OMP_single', 'OMP_serial', | ||||

232 | 'OMP_parallel', 'OMP_master', 'OMP_task_immediate', | ||||

233 | 'OMP_task_taskwait', 'OMP_task_taskyield', 'OMP_task_taskgroup', | ||||

234 | 'OMP_task_join_bar', 'OMP_task_plain_bar', 'OMP_task_taskyield'): | ||||

235 | compKeys[key] = data[key] | ||||

236 | else: | ||||

237 | nonCompKeys[key] = data[key] | ||||

238 | print "comp keys:", compKeys, "\n\n non comp keys:", nonCompKeys | ||||

239 | return [compKeys, nonCompKeys] | ||||

240 | | ||||

241 | def drawMainPie(data, filebase, colors): | ||||

242 | sizes = [sum(data[0].values()), sum(data[1].values())] | ||||

243 | explode = [0,0] | ||||

244 | labels = ["Compute - " + "%.2f" % sizes[0], "Non Compute - " + "%.2f" % sizes[1]] | ||||

245 | patches = plt.pie(sizes, explode, colors=colors, startangle=90) | ||||

246 | plt.title("Time Division") | ||||

247 | plt.axis('equal') | ||||

248 | plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16) | ||||

249 | plt.savefig(filebase+"_main_pie", bbox_inches='tight') | ||||

250 | | ||||

251 | def drawSubPie(data, tag, filebase, colors): | ||||

252 | explode = [] | ||||

253 | labels = data.keys() | ||||

254 | sizes = data.values() | ||||

255 | total = sum(sizes) | ||||

256 | percent = [] | ||||

257 | for i in range(len(sizes)): | ||||

258 | explode.append(0) | ||||

259 | percent.append(100 * sizes[i] / total) | ||||

260 | labels[i] = labels[i] + " - %.2f" % percent[i] | ||||

261 | patches = plt.pie(sizes, explode=explode, colors=colors, startangle=90) | ||||

262 | plt.title(tag+"(Percentage of Total:"+" %.2f" % (sum(data.values()))+")") | ||||

263 | plt.tight_layout() | ||||

264 | plt.axis('equal') | ||||

265 | plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16) | ||||

266 | plt.savefig(filebase+"_"+tag, bbox_inches='tight') | ||||

267 | | ||||

268 | def main(): | ||||

269 | parser = argparse.ArgumentParser(description='''This script takes a list | ||||

270 | of files containing each of which contain output from a stats-gathering | ||||

271 | enabled OpenMP runtime library. Each stats file is read, parsed, and | ||||

272 | used to produce a summary of the statistics''') | ||||

273 | parser.add_argument('files', nargs='+', | ||||

274 | help='files to parse which contain stats-gathering output') | ||||

275 | command_args = parser.parse_args() | ||||

276 | colors = ['orange', 'b', 'r', 'yellowgreen', 'lightsage', 'lightpink', | ||||

277 | 'green', 'purple', 'yellow', 'cyan', 'mediumturquoise', | ||||

278 | 'olive'] | ||||

279 | stats = {} | ||||

280 | matplotlib.rcParams.update({'font.size':22}) | ||||

281 | for s in interestingStats: | ||||

282 | fig, ax = plt.subplots() | ||||

283 | width = 0.45 | ||||

284 | n = 0 | ||||

285 | index = 0 | ||||

286 | | ||||

287 | for f in command_args.files: | ||||

288 | filebase = os.path.splitext(f)[0] | ||||

289 | tmp = readFile(f) | ||||

290 | data = tmp[s]['Total'] | ||||

291 | """preventing repetition by removing rows similar to Total_OMP_work | ||||

292 | as Total_OMP_work['Total'] is same as OMP_work['Total']""" | ||||

293 | if s == 'counters': | ||||

294 | elapsedTime = tmp["timers"]["Mean"]["OMP_worker_thread_life"] | ||||

295 | normalizeValues(tmp["counters"], "SampleCount", | ||||

296 | elapsedTime / 1.e9) | ||||

297 | """Plotting radar charts""" | ||||

298 | params = setRadarFigure(data.keys()) | ||||

299 | chartType = "radar" | ||||

300 | drawRadarChart(data, s, filebase, params, colors[n]) | ||||

301 | """radar Charts finish here""" | ||||

302 | plt.savefig(filebase+"_"+s+"_"+chartType, bbox_inches='tight') | ||||

303 | elif s == 'timers': | ||||

304 | print "overheads in "+filebase | ||||

305 | numThreads = tmp[s]['SampleCount']['Total_OMP_parallel'] | ||||

306 | for key in data.keys(): | ||||

307 | if key[0:5] == 'Total': | ||||

308 | del data[key] | ||||

309 | stats[filebase] = derivedTimerStats(data) | ||||

310 | dataSubSet = compPie(stats[filebase]) | ||||

311 | drawMainPie(dataSubSet, filebase, colors) | ||||

312 | plt.figure(0) | ||||

313 | drawSubPie(dataSubSet[0], "Computational Time", filebase, colors) | ||||

314 | plt.figure(1) | ||||

315 | drawSubPie(dataSubSet[1], "Non Computational Time", filebase, colors) | ||||

316 | with open('derivedStats_{}.csv'.format(filebase), 'w') as f: | ||||

317 | f.write('================={}====================\n'.format(filebase)) | ||||

318 | f.write(pd.DataFrame(stats[filebase].items()).to_csv()+'\n') | ||||

319 | n += 1 | ||||

320 | plt.close() | ||||

321 | | ||||

322 | if __name__ == "__main__": | ||||

323 | main() |