blob: 67a68ebe750b582097d667e90fe14888389ba730 [file] [log] [blame]
xf.li86118912025-03-19 20:07:27 -07001"""Class for printing reports on profiled python code."""
2
3# Written by James Roskind
4# Based on prior profile module by Sjoerd Mullender...
5# which was hacked somewhat by: Guido van Rossum
6
7# Copyright Disney Enterprises, Inc. All Rights Reserved.
8# Licensed to PSF under a Contributor Agreement
9#
10# Licensed under the Apache License, Version 2.0 (the "License");
11# you may not use this file except in compliance with the License.
12# You may obtain a copy of the License at
13#
14# http://www.apache.org/licenses/LICENSE-2.0
15#
16# Unless required by applicable law or agreed to in writing, software
17# distributed under the License is distributed on an "AS IS" BASIS,
18# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
19# either express or implied. See the License for the specific language
20# governing permissions and limitations under the License.
21
22
23import sys
24import os
25import time
26import marshal
27import re
28from enum import Enum
29from functools import cmp_to_key
30
31__all__ = ["Stats", "SortKey"]
32
33
34class SortKey(str, Enum):
35 CALLS = 'calls', 'ncalls'
36 CUMULATIVE = 'cumulative', 'cumtime'
37 FILENAME = 'filename', 'module'
38 LINE = 'line'
39 NAME = 'name'
40 NFL = 'nfl'
41 PCALLS = 'pcalls'
42 STDNAME = 'stdname'
43 TIME = 'time', 'tottime'
44
45 def __new__(cls, *values):
46 value = values[0]
47 obj = str.__new__(cls, value)
48 obj._value_ = value
49 for other_value in values[1:]:
50 cls._value2member_map_[other_value] = obj
51 obj._all_values = values
52 return obj
53
54
55class Stats:
56 """This class is used for creating reports from data generated by the
57 Profile class. It is a "friend" of that class, and imports data either
58 by direct access to members of Profile class, or by reading in a dictionary
59 that was emitted (via marshal) from the Profile class.
60
61 The big change from the previous Profiler (in terms of raw functionality)
62 is that an "add()" method has been provided to combine Stats from
63 several distinct profile runs. Both the constructor and the add()
64 method now take arbitrarily many file names as arguments.
65
66 All the print methods now take an argument that indicates how many lines
67 to print. If the arg is a floating point number between 0 and 1.0, then
68 it is taken as a decimal percentage of the available lines to be printed
69 (e.g., .1 means print 10% of all available lines). If it is an integer,
70 it is taken to mean the number of lines of data that you wish to have
71 printed.
72
73 The sort_stats() method now processes some additional options (i.e., in
74 addition to the old -1, 0, 1, or 2 that are respectively interpreted as
75 'stdname', 'calls', 'time', and 'cumulative'). It takes either an
76 arbitrary number of quoted strings or SortKey enum to select the sort
77 order.
78
79 For example sort_stats('time', 'name') or sort_stats(SortKey.TIME,
80 SortKey.NAME) sorts on the major key of 'internal function time', and on
81 the minor key of 'the name of the function'. Look at the two tables in
82 sort_stats() and get_sort_arg_defs(self) for more examples.
83
84 All methods return self, so you can string together commands like:
85 Stats('foo', 'goo').strip_dirs().sort_stats('calls').\
86 print_stats(5).print_callers(5)
87 """
88
89 def __init__(self, *args, stream=None):
90 self.stream = stream or sys.stdout
91 if not len(args):
92 arg = None
93 else:
94 arg = args[0]
95 args = args[1:]
96 self.init(arg)
97 self.add(*args)
98
99 def init(self, arg):
100 self.all_callees = None # calc only if needed
101 self.files = []
102 self.fcn_list = None
103 self.total_tt = 0
104 self.total_calls = 0
105 self.prim_calls = 0
106 self.max_name_len = 0
107 self.top_level = set()
108 self.stats = {}
109 self.sort_arg_dict = {}
110 self.load_stats(arg)
111 try:
112 self.get_top_level_stats()
113 except Exception:
114 print("Invalid timing data %s" %
115 (self.files[-1] if self.files else ''), file=self.stream)
116 raise
117
118 def load_stats(self, arg):
119 if arg is None:
120 self.stats = {}
121 return
122 elif isinstance(arg, str):
123 with open(arg, 'rb') as f:
124 self.stats = marshal.load(f)
125 try:
126 file_stats = os.stat(arg)
127 arg = time.ctime(file_stats.st_mtime) + " " + arg
128 except: # in case this is not unix
129 pass
130 self.files = [arg]
131 elif hasattr(arg, 'create_stats'):
132 arg.create_stats()
133 self.stats = arg.stats
134 arg.stats = {}
135 if not self.stats:
136 raise TypeError("Cannot create or construct a %r object from %r"
137 % (self.__class__, arg))
138 return
139
140 def get_top_level_stats(self):
141 for func, (cc, nc, tt, ct, callers) in self.stats.items():
142 self.total_calls += nc
143 self.prim_calls += cc
144 self.total_tt += tt
145 if ("jprofile", 0, "profiler") in callers:
146 self.top_level.add(func)
147 if len(func_std_string(func)) > self.max_name_len:
148 self.max_name_len = len(func_std_string(func))
149
150 def add(self, *arg_list):
151 if not arg_list:
152 return self
153 for item in reversed(arg_list):
154 if type(self) != type(item):
155 item = Stats(item)
156 self.files += item.files
157 self.total_calls += item.total_calls
158 self.prim_calls += item.prim_calls
159 self.total_tt += item.total_tt
160 for func in item.top_level:
161 self.top_level.add(func)
162
163 if self.max_name_len < item.max_name_len:
164 self.max_name_len = item.max_name_len
165
166 self.fcn_list = None
167
168 for func, stat in item.stats.items():
169 if func in self.stats:
170 old_func_stat = self.stats[func]
171 else:
172 old_func_stat = (0, 0, 0, 0, {},)
173 self.stats[func] = add_func_stats(old_func_stat, stat)
174 return self
175
176 def dump_stats(self, filename):
177 """Write the profile data to a file we know how to load back."""
178 with open(filename, 'wb') as f:
179 marshal.dump(self.stats, f)
180
181 # list the tuple indices and directions for sorting,
182 # along with some printable description
183 sort_arg_dict_default = {
184 "calls" : (((1,-1), ), "call count"),
185 "ncalls" : (((1,-1), ), "call count"),
186 "cumtime" : (((3,-1), ), "cumulative time"),
187 "cumulative": (((3,-1), ), "cumulative time"),
188 "filename" : (((4, 1), ), "file name"),
189 "line" : (((5, 1), ), "line number"),
190 "module" : (((4, 1), ), "file name"),
191 "name" : (((6, 1), ), "function name"),
192 "nfl" : (((6, 1),(4, 1),(5, 1),), "name/file/line"),
193 "pcalls" : (((0,-1), ), "primitive call count"),
194 "stdname" : (((7, 1), ), "standard name"),
195 "time" : (((2,-1), ), "internal time"),
196 "tottime" : (((2,-1), ), "internal time"),
197 }
198
199 def get_sort_arg_defs(self):
200 """Expand all abbreviations that are unique."""
201 if not self.sort_arg_dict:
202 self.sort_arg_dict = dict = {}
203 bad_list = {}
204 for word, tup in self.sort_arg_dict_default.items():
205 fragment = word
206 while fragment:
207 if not fragment:
208 break
209 if fragment in dict:
210 bad_list[fragment] = 0
211 break
212 dict[fragment] = tup
213 fragment = fragment[:-1]
214 for word in bad_list:
215 del dict[word]
216 return self.sort_arg_dict
217
218 def sort_stats(self, *field):
219 if not field:
220 self.fcn_list = 0
221 return self
222 if len(field) == 1 and isinstance(field[0], int):
223 # Be compatible with old profiler
224 field = [ {-1: "stdname",
225 0: "calls",
226 1: "time",
227 2: "cumulative"}[field[0]] ]
228 elif len(field) >= 2:
229 for arg in field[1:]:
230 if type(arg) != type(field[0]):
231 raise TypeError("Can't have mixed argument type")
232
233 sort_arg_defs = self.get_sort_arg_defs()
234
235 sort_tuple = ()
236 self.sort_type = ""
237 connector = ""
238 for word in field:
239 if isinstance(word, SortKey):
240 word = word.value
241 sort_tuple = sort_tuple + sort_arg_defs[word][0]
242 self.sort_type += connector + sort_arg_defs[word][1]
243 connector = ", "
244
245 stats_list = []
246 for func, (cc, nc, tt, ct, callers) in self.stats.items():
247 stats_list.append((cc, nc, tt, ct) + func +
248 (func_std_string(func), func))
249
250 stats_list.sort(key=cmp_to_key(TupleComp(sort_tuple).compare))
251
252 self.fcn_list = fcn_list = []
253 for tuple in stats_list:
254 fcn_list.append(tuple[-1])
255 return self
256
257 def reverse_order(self):
258 if self.fcn_list:
259 self.fcn_list.reverse()
260 return self
261
262 def strip_dirs(self):
263 oldstats = self.stats
264 self.stats = newstats = {}
265 max_name_len = 0
266 for func, (cc, nc, tt, ct, callers) in oldstats.items():
267 newfunc = func_strip_path(func)
268 if len(func_std_string(newfunc)) > max_name_len:
269 max_name_len = len(func_std_string(newfunc))
270 newcallers = {}
271 for func2, caller in callers.items():
272 newcallers[func_strip_path(func2)] = caller
273
274 if newfunc in newstats:
275 newstats[newfunc] = add_func_stats(
276 newstats[newfunc],
277 (cc, nc, tt, ct, newcallers))
278 else:
279 newstats[newfunc] = (cc, nc, tt, ct, newcallers)
280 old_top = self.top_level
281 self.top_level = new_top = set()
282 for func in old_top:
283 new_top.add(func_strip_path(func))
284
285 self.max_name_len = max_name_len
286
287 self.fcn_list = None
288 self.all_callees = None
289 return self
290
291 def calc_callees(self):
292 if self.all_callees:
293 return
294 self.all_callees = all_callees = {}
295 for func, (cc, nc, tt, ct, callers) in self.stats.items():
296 if not func in all_callees:
297 all_callees[func] = {}
298 for func2, caller in callers.items():
299 if not func2 in all_callees:
300 all_callees[func2] = {}
301 all_callees[func2][func] = caller
302 return
303
304 #******************************************************************
305 # The following functions support actual printing of reports
306 #******************************************************************
307
308 # Optional "amount" is either a line count, or a percentage of lines.
309
310 def eval_print_amount(self, sel, list, msg):
311 new_list = list
312 if isinstance(sel, str):
313 try:
314 rex = re.compile(sel)
315 except re.error:
316 msg += " <Invalid regular expression %r>\n" % sel
317 return new_list, msg
318 new_list = []
319 for func in list:
320 if rex.search(func_std_string(func)):
321 new_list.append(func)
322 else:
323 count = len(list)
324 if isinstance(sel, float) and 0.0 <= sel < 1.0:
325 count = int(count * sel + .5)
326 new_list = list[:count]
327 elif isinstance(sel, int) and 0 <= sel < count:
328 count = sel
329 new_list = list[:count]
330 if len(list) != len(new_list):
331 msg += " List reduced from %r to %r due to restriction <%r>\n" % (
332 len(list), len(new_list), sel)
333
334 return new_list, msg
335
336 def get_print_list(self, sel_list):
337 width = self.max_name_len
338 if self.fcn_list:
339 stat_list = self.fcn_list[:]
340 msg = " Ordered by: " + self.sort_type + '\n'
341 else:
342 stat_list = list(self.stats.keys())
343 msg = " Random listing order was used\n"
344
345 for selection in sel_list:
346 stat_list, msg = self.eval_print_amount(selection, stat_list, msg)
347
348 count = len(stat_list)
349
350 if not stat_list:
351 return 0, stat_list
352 print(msg, file=self.stream)
353 if count < len(self.stats):
354 width = 0
355 for func in stat_list:
356 if len(func_std_string(func)) > width:
357 width = len(func_std_string(func))
358 return width+2, stat_list
359
360 def print_stats(self, *amount):
361 for filename in self.files:
362 print(filename, file=self.stream)
363 if self.files:
364 print(file=self.stream)
365 indent = ' ' * 8
366 for func in self.top_level:
367 print(indent, func_get_function_name(func), file=self.stream)
368
369 print(indent, self.total_calls, "function calls", end=' ', file=self.stream)
370 if self.total_calls != self.prim_calls:
371 print("(%d primitive calls)" % self.prim_calls, end=' ', file=self.stream)
372 print("in %.3f seconds" % self.total_tt, file=self.stream)
373 print(file=self.stream)
374 width, list = self.get_print_list(amount)
375 if list:
376 self.print_title()
377 for func in list:
378 self.print_line(func)
379 print(file=self.stream)
380 print(file=self.stream)
381 return self
382
383 def print_callees(self, *amount):
384 width, list = self.get_print_list(amount)
385 if list:
386 self.calc_callees()
387
388 self.print_call_heading(width, "called...")
389 for func in list:
390 if func in self.all_callees:
391 self.print_call_line(width, func, self.all_callees[func])
392 else:
393 self.print_call_line(width, func, {})
394 print(file=self.stream)
395 print(file=self.stream)
396 return self
397
398 def print_callers(self, *amount):
399 width, list = self.get_print_list(amount)
400 if list:
401 self.print_call_heading(width, "was called by...")
402 for func in list:
403 cc, nc, tt, ct, callers = self.stats[func]
404 self.print_call_line(width, func, callers, "<-")
405 print(file=self.stream)
406 print(file=self.stream)
407 return self
408
409 def print_call_heading(self, name_size, column_title):
410 print("Function ".ljust(name_size) + column_title, file=self.stream)
411 # print sub-header only if we have new-style callers
412 subheader = False
413 for cc, nc, tt, ct, callers in self.stats.values():
414 if callers:
415 value = next(iter(callers.values()))
416 subheader = isinstance(value, tuple)
417 break
418 if subheader:
419 print(" "*name_size + " ncalls tottime cumtime", file=self.stream)
420
421 def print_call_line(self, name_size, source, call_dict, arrow="->"):
422 print(func_std_string(source).ljust(name_size) + arrow, end=' ', file=self.stream)
423 if not call_dict:
424 print(file=self.stream)
425 return
426 clist = sorted(call_dict.keys())
427 indent = ""
428 for func in clist:
429 name = func_std_string(func)
430 value = call_dict[func]
431 if isinstance(value, tuple):
432 nc, cc, tt, ct = value
433 if nc != cc:
434 substats = '%d/%d' % (nc, cc)
435 else:
436 substats = '%d' % (nc,)
437 substats = '%s %s %s %s' % (substats.rjust(7+2*len(indent)),
438 f8(tt), f8(ct), name)
439 left_width = name_size + 1
440 else:
441 substats = '%s(%r) %s' % (name, value, f8(self.stats[func][3]))
442 left_width = name_size + 3
443 print(indent*left_width + substats, file=self.stream)
444 indent = " "
445
446 def print_title(self):
447 print(' ncalls tottime percall cumtime percall', end=' ', file=self.stream)
448 print('filename:lineno(function)', file=self.stream)
449
450 def print_line(self, func): # hack: should print percentages
451 cc, nc, tt, ct, callers = self.stats[func]
452 c = str(nc)
453 if nc != cc:
454 c = c + '/' + str(cc)
455 print(c.rjust(9), end=' ', file=self.stream)
456 print(f8(tt), end=' ', file=self.stream)
457 if nc == 0:
458 print(' '*8, end=' ', file=self.stream)
459 else:
460 print(f8(tt/nc), end=' ', file=self.stream)
461 print(f8(ct), end=' ', file=self.stream)
462 if cc == 0:
463 print(' '*8, end=' ', file=self.stream)
464 else:
465 print(f8(ct/cc), end=' ', file=self.stream)
466 print(func_std_string(func), file=self.stream)
467
468class TupleComp:
469 """This class provides a generic function for comparing any two tuples.
470 Each instance records a list of tuple-indices (from most significant
471 to least significant), and sort direction (ascending or decending) for
472 each tuple-index. The compare functions can then be used as the function
473 argument to the system sort() function when a list of tuples need to be
474 sorted in the instances order."""
475
476 def __init__(self, comp_select_list):
477 self.comp_select_list = comp_select_list
478
479 def compare (self, left, right):
480 for index, direction in self.comp_select_list:
481 l = left[index]
482 r = right[index]
483 if l < r:
484 return -direction
485 if l > r:
486 return direction
487 return 0
488
489
490#**************************************************************************
491# func_name is a triple (file:string, line:int, name:string)
492
493def func_strip_path(func_name):
494 filename, line, name = func_name
495 return os.path.basename(filename), line, name
496
497def func_get_function_name(func):
498 return func[2]
499
500def func_std_string(func_name): # match what old profile produced
501 if func_name[:2] == ('~', 0):
502 # special case for built-in functions
503 name = func_name[2]
504 if name.startswith('<') and name.endswith('>'):
505 return '{%s}' % name[1:-1]
506 else:
507 return name
508 else:
509 return "%s:%d(%s)" % func_name
510
511#**************************************************************************
512# The following functions combine statistics for pairs functions.
513# The bulk of the processing involves correctly handling "call" lists,
514# such as callers and callees.
515#**************************************************************************
516
517def add_func_stats(target, source):
518 """Add together all the stats for two profile entries."""
519 cc, nc, tt, ct, callers = source
520 t_cc, t_nc, t_tt, t_ct, t_callers = target
521 return (cc+t_cc, nc+t_nc, tt+t_tt, ct+t_ct,
522 add_callers(t_callers, callers))
523
524def add_callers(target, source):
525 """Combine two caller lists in a single list."""
526 new_callers = {}
527 for func, caller in target.items():
528 new_callers[func] = caller
529 for func, caller in source.items():
530 if func in new_callers:
531 if isinstance(caller, tuple):
532 # format used by cProfile
533 new_callers[func] = tuple(i + j for i, j in zip(caller, new_callers[func]))
534 else:
535 # format used by profile
536 new_callers[func] += caller
537 else:
538 new_callers[func] = caller
539 return new_callers
540
541def count_calls(callers):
542 """Sum the caller statistics to get total number of calls received."""
543 nc = 0
544 for calls in callers.values():
545 nc += calls
546 return nc
547
548#**************************************************************************
549# The following functions support printing of reports
550#**************************************************************************
551
552def f8(x):
553 return "%8.3f" % x
554
555#**************************************************************************
556# Statistics browser added by ESR, April 2001
557#**************************************************************************
558
559if __name__ == '__main__':
560 import cmd
561 try:
562 import readline
563 except ImportError:
564 pass
565
566 class ProfileBrowser(cmd.Cmd):
567 def __init__(self, profile=None):
568 cmd.Cmd.__init__(self)
569 self.prompt = "% "
570 self.stats = None
571 self.stream = sys.stdout
572 if profile is not None:
573 self.do_read(profile)
574
575 def generic(self, fn, line):
576 args = line.split()
577 processed = []
578 for term in args:
579 try:
580 processed.append(int(term))
581 continue
582 except ValueError:
583 pass
584 try:
585 frac = float(term)
586 if frac > 1 or frac < 0:
587 print("Fraction argument must be in [0, 1]", file=self.stream)
588 continue
589 processed.append(frac)
590 continue
591 except ValueError:
592 pass
593 processed.append(term)
594 if self.stats:
595 getattr(self.stats, fn)(*processed)
596 else:
597 print("No statistics object is loaded.", file=self.stream)
598 return 0
599 def generic_help(self):
600 print("Arguments may be:", file=self.stream)
601 print("* An integer maximum number of entries to print.", file=self.stream)
602 print("* A decimal fractional number between 0 and 1, controlling", file=self.stream)
603 print(" what fraction of selected entries to print.", file=self.stream)
604 print("* A regular expression; only entries with function names", file=self.stream)
605 print(" that match it are printed.", file=self.stream)
606
607 def do_add(self, line):
608 if self.stats:
609 try:
610 self.stats.add(line)
611 except OSError as e:
612 print("Failed to load statistics for %s: %s" % (line, e), file=self.stream)
613 else:
614 print("No statistics object is loaded.", file=self.stream)
615 return 0
616 def help_add(self):
617 print("Add profile info from given file to current statistics object.", file=self.stream)
618
619 def do_callees(self, line):
620 return self.generic('print_callees', line)
621 def help_callees(self):
622 print("Print callees statistics from the current stat object.", file=self.stream)
623 self.generic_help()
624
625 def do_callers(self, line):
626 return self.generic('print_callers', line)
627 def help_callers(self):
628 print("Print callers statistics from the current stat object.", file=self.stream)
629 self.generic_help()
630
631 def do_EOF(self, line):
632 print("", file=self.stream)
633 return 1
634 def help_EOF(self):
635 print("Leave the profile browser.", file=self.stream)
636
637 def do_quit(self, line):
638 return 1
639 def help_quit(self):
640 print("Leave the profile browser.", file=self.stream)
641
642 def do_read(self, line):
643 if line:
644 try:
645 self.stats = Stats(line)
646 except OSError as err:
647 print(err.args[1], file=self.stream)
648 return
649 except Exception as err:
650 print(err.__class__.__name__ + ':', err, file=self.stream)
651 return
652 self.prompt = line + "% "
653 elif len(self.prompt) > 2:
654 line = self.prompt[:-2]
655 self.do_read(line)
656 else:
657 print("No statistics object is current -- cannot reload.", file=self.stream)
658 return 0
659 def help_read(self):
660 print("Read in profile data from a specified file.", file=self.stream)
661 print("Without argument, reload the current file.", file=self.stream)
662
663 def do_reverse(self, line):
664 if self.stats:
665 self.stats.reverse_order()
666 else:
667 print("No statistics object is loaded.", file=self.stream)
668 return 0
669 def help_reverse(self):
670 print("Reverse the sort order of the profiling report.", file=self.stream)
671
672 def do_sort(self, line):
673 if not self.stats:
674 print("No statistics object is loaded.", file=self.stream)
675 return
676 abbrevs = self.stats.get_sort_arg_defs()
677 if line and all((x in abbrevs) for x in line.split()):
678 self.stats.sort_stats(*line.split())
679 else:
680 print("Valid sort keys (unique prefixes are accepted):", file=self.stream)
681 for (key, value) in Stats.sort_arg_dict_default.items():
682 print("%s -- %s" % (key, value[1]), file=self.stream)
683 return 0
684 def help_sort(self):
685 print("Sort profile data according to specified keys.", file=self.stream)
686 print("(Typing `sort' without arguments lists valid keys.)", file=self.stream)
687 def complete_sort(self, text, *args):
688 return [a for a in Stats.sort_arg_dict_default if a.startswith(text)]
689
690 def do_stats(self, line):
691 return self.generic('print_stats', line)
692 def help_stats(self):
693 print("Print statistics from the current stat object.", file=self.stream)
694 self.generic_help()
695
696 def do_strip(self, line):
697 if self.stats:
698 self.stats.strip_dirs()
699 else:
700 print("No statistics object is loaded.", file=self.stream)
701 def help_strip(self):
702 print("Strip leading path information from filenames in the report.", file=self.stream)
703
704 def help_help(self):
705 print("Show help for a given command.", file=self.stream)
706
707 def postcmd(self, stop, line):
708 if stop:
709 return stop
710 return None
711
712 if len(sys.argv) > 1:
713 initprofile = sys.argv[1]
714 else:
715 initprofile = None
716 try:
717 browser = ProfileBrowser(initprofile)
718 for profile in sys.argv[2:]:
719 browser.do_add(profile)
720 print("Welcome to the profile statistics browser.", file=browser.stream)
721 browser.cmdloop()
722 print("Goodbye.", file=browser.stream)
723 except KeyboardInterrupt:
724 pass
725
726# That's all, folks.