From 1583c6e326a8454d3c806763620e1329bf6b7cbe Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 12 Dec 2022 09:50:43 -0500 Subject: [PATCH] GH-100143: Improve collecting pystats for parts of runs (GH-100144) * pystats off by default * Add -Xpystats flag * Always dump pystats, even if turned off --- ...-12-09-14-27-36.gh-issue-100143.5g9rb4.rst | 3 ++ Python/initconfig.c | 15 +++++- Python/specialize.c | 7 +-- Tools/scripts/summarize_stats.py | 48 +++++++++++-------- 4 files changed, 48 insertions(+), 25 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-12-09-14-27-36.gh-issue-100143.5g9rb4.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-12-09-14-27-36.gh-issue-100143.5g9rb4.rst b/Misc/NEWS.d/next/Core and Builtins/2022-12-09-14-27-36.gh-issue-100143.5g9rb4.rst new file mode 100644 index 00000000000000..20a25f8b03d1d2 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-12-09-14-27-36.gh-issue-100143.5g9rb4.rst @@ -0,0 +1,3 @@ +When built with ``--enable-pystats``, stats collection is now off by +default. To enable it early at startup, pass the ``-Xpystats`` flag. Stats +are now always dumped, even if switched off. diff --git a/Python/initconfig.c b/Python/initconfig.c index 64ae987b3f34d9..d05099cd997790 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -129,7 +129,14 @@ The following implementation-specific options are available:\n\ \n\ -X int_max_str_digits=number: limit the size of int<->str conversions.\n\ This helps avoid denial of service attacks when parsing untrusted data.\n\ - The default is sys.int_info.default_max_str_digits. 0 disables."; + The default is sys.int_info.default_max_str_digits. 0 disables." + +#ifdef Py_STATS +"\n\ +\n\ +-X pystats: Enable pystats collection at startup." +#endif +; /* Envvars that don't have equivalent command-line options are listed first */ static const char usage_envvars[] = @@ -2186,6 +2193,12 @@ config_read(PyConfig *config, int compute_path_config) config->show_ref_count = 1; } +#ifdef Py_STATS + if (config_get_xoption(config, L"pystats")) { + _py_stats = &_py_stats_struct; + } +#endif + status = config_read_complex_options(config); if (_PyStatus_EXCEPTION(status)) { return status; diff --git a/Python/specialize.c b/Python/specialize.c index 7545a7712493e6..785088eac8c528 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -18,7 +18,7 @@ #ifdef Py_STATS PyStats _py_stats_struct = { 0 }; -PyStats *_py_stats = &_py_stats_struct; +PyStats *_py_stats = NULL; #define ADD_STAT_TO_DICT(res, field) \ do { \ @@ -205,9 +205,6 @@ _Py_StatsClear(void) void _Py_PrintSpecializationStats(int to_file) { - if (_py_stats == NULL) { - return; - } FILE *out = stderr; if (to_file) { /* Write to a file instead of stderr. */ @@ -238,7 +235,7 @@ _Py_PrintSpecializationStats(int to_file) else { fprintf(out, "Specialization stats:\n"); } - print_stats(out, _py_stats); + print_stats(out, &_py_stats_struct); if (out != stderr) { fclose(out); } diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index c15501bdc761e7..c30a60e9514bda 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -34,6 +34,16 @@ TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count" +def format_ratio(num, den): + """ + Format a ratio as a percentage. When the denominator is 0, returns the empty + string. + """ + if den == 0: + return "" + else: + return f"{num/den:.01%}" + def join_rows(a_rows, b_rows): """ Joins two tables together, side-by-side, where the first column in each is a @@ -87,7 +97,7 @@ def calculate_specialization_stats(family_stats, total): continue else: label = key - rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%")) + rows.append((f"{label:>12}", f"{family_stats[key]:>12}", format_ratio(family_stats[key], total))) return rows def calculate_specialization_success_failure(family_stats): @@ -100,7 +110,7 @@ def calculate_specialization_success_failure(family_stats): label = key[len("specialization."):] label = label[0].upper() + label[1:] val = family_stats.get(key, 0) - rows.append((label, val, f"{100*val/total_attempts:0.1f}%")) + rows.append((label, val, format_ratio(val, total_attempts))) return rows def calculate_specialization_failure_kinds(name, family_stats, defines): @@ -118,7 +128,7 @@ def calculate_specialization_failure_kinds(name, family_stats, defines): for value, index in failures: if not value: continue - rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%")) + rows.append((kind_to_text(index, defines, name), value, format_ratio(value, total_failures))) return rows def print_specialization_stats(name, family_stats, defines): @@ -318,11 +328,11 @@ def calculate_execution_counts(opcode_stats, total): for (count, name, miss) in counts: cumulative += count if miss: - miss = f"{100*miss/count:0.1f}%" + miss = format_ratio(miss, count) else: miss = "" - rows.append((name, count, f"{100*count/total:0.1f}%", - f"{100*cumulative/total:0.1f}%", miss)) + rows.append((name, count, format_ratio(count, total), + format_ratio(cumulative, total), miss)) return rows def emit_execution_counts(opcode_stats, total): @@ -386,9 +396,9 @@ def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats): def calculate_specialization_effectiveness(opcode_stats, total): basic, not_specialized, specialized = categorized_counts(opcode_stats) return [ - ("Basic", basic, f"{basic*100/total:0.1f}%"), - ("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"), - ("Specialized", specialized, f"{specialized*100/total:0.1f}%"), + ("Basic", basic, format_ratio(basic, total)), + ("Not specialized", not_specialized, format_ratio(not_specialized, total)), + ("Specialized", specialized, format_ratio(specialized, total)), ] def emit_specialization_overview(opcode_stats, total): @@ -405,7 +415,7 @@ def emit_specialization_overview(opcode_stats, total): counts.sort(reverse=True) if total: with Section(f"{title} by instruction", 3): - rows = [ (name, count, f"{100*count/total:0.1f}%") for (count, name) in counts[:10] ] + rows = [ (name, count, format_ratio(count, total)) for (count, name) in counts[:10] ] emit_table(("Name", "Count:", "Ratio:"), rows) def emit_comparative_specialization_overview(base_opcode_stats, base_total, head_opcode_stats, head_total): @@ -432,15 +442,15 @@ def calculate_call_stats(stats): rows = [] for key, value in stats.items(): if "Calls to" in key: - rows.append((key, value, f"{100*value/total:0.1f}%")) + rows.append((key, value, format_ratio(value, total))) elif key.startswith("Calls "): name, index = key[:-1].split("[") index = int(index) label = name + " (" + pretty(defines[index][0]) + ")" - rows.append((label, value, f"{100*value/total:0.1f}%")) + rows.append((label, value, format_ratio(value, total))) for key, value in stats.items(): if key.startswith("Frame"): - rows.append((key, value, f"{100*value/total:0.1f}%")) + rows.append((key, value, format_ratio(value, total))) return rows def emit_call_stats(stats): @@ -468,13 +478,13 @@ def calculate_object_stats(stats): for key, value in stats.items(): if key.startswith("Object"): if "materialize" in key: - ratio = f"{100*value/total_materializations:0.1f}%" + ratio = format_ratio(value, total_materializations) elif "allocations" in key: - ratio = f"{100*value/total_allocations:0.1f}%" + ratio = format_ratio(value, total_allocations) elif "increfs" in key: - ratio = f"{100*value/total_increfs:0.1f}%" + ratio = format_ratio(value, total_increfs) elif "decrefs" in key: - ratio = f"{100*value/total_decrefs:0.1f}%" + ratio = format_ratio(value, total_decrefs) else: ratio = "" label = key[6:].strip() @@ -517,8 +527,8 @@ def emit_pair_counts(opcode_stats, total): for (count, pair) in itertools.islice(pair_counts, 100): i, j = pair cumulative += count - rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%", - f"{100*cumulative/total:0.1f}%")) + rows.append((opname[i] + " " + opname[j], count, format_ratio(count, total), + format_ratio(cumulative, total))) emit_table(("Pair", "Count:", "Self:", "Cumulative:"), rows )