Skip to content

Commit

Permalink
survey: add pathname of blob or tree to large_item_vec
Browse files Browse the repository at this point in the history
Include the pathname of each blob or tree in the large_item_vec
to help identify the file or directory associated with the OID
and size information.

This pathname is computed during the path walk, so it reflects the
first observed pathname seen for that OID during the traversal over
all of the refs.  Since the file or directory could have moved
(without being modified), there may be multiple "correct" pathnames
for a particular OID.  Since we do not control the ref traversal
order, we should consider it to be a "suggested pathname" for the OID.

Signed-off-by: Jeff Hostetler <[email protected]>
  • Loading branch information
jeffhostetler authored and dscho committed Jan 1, 2025
1 parent 73e6cd5 commit f35a2a1
Showing 1 changed file with 32 additions and 11 deletions.
43 changes: 32 additions & 11 deletions builtin/survey.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ static void incr_obj_hist_bin(struct obj_hist_bin *pbin,
struct large_item {
uint64_t size;
struct object_id oid;
struct strbuf name;
};

struct large_item_vec {
Expand All @@ -171,6 +172,7 @@ static struct large_item_vec *alloc_large_item_vec(const char *dimension_label,
{
struct large_item_vec *vec;
size_t flex_len = nr_items * sizeof(struct large_item);
size_t k;

if (!nr_items)
return NULL;
Expand All @@ -180,6 +182,9 @@ static struct large_item_vec *alloc_large_item_vec(const char *dimension_label,
vec->item_label = strdup(item_label);
vec->nr_items = nr_items;

for (k = 0; k < nr_items; k++)
strbuf_init(&vec->items[k].name, 0);

return vec;
}

Expand All @@ -188,14 +193,18 @@ static void free_large_item_vec(struct large_item_vec *vec)
if (!vec)
return;

for (size_t k = 0; k < vec->nr_items; k++)
strbuf_release(&vec->items[k].name);

free(vec->dimension_label);
free(vec->item_label);
free(vec);
}

static void maybe_insert_large_item(struct large_item_vec *vec,
uint64_t size,
struct object_id *oid)
struct object_id *oid,
const char *name)
{
size_t rest_len;
size_t k;
Expand All @@ -215,7 +224,14 @@ static void maybe_insert_large_item(struct large_item_vec *vec,
if (size < vec->items[k].size)
continue;

/* push items[k..] down one and insert it here */
/*
* The last large_item in the vector is about to be
* overwritten by the previous one during the shift.
* Steal its allocated strbuf and reuse it.
*/
strbuf_release(&vec->items[vec->nr_items - 1].name);

/* push items[k..] down one and insert data for this item here */

rest_len = (vec->nr_items - k - 1) * sizeof(struct large_item);
if (rest_len)
Expand All @@ -224,6 +240,10 @@ static void maybe_insert_large_item(struct large_item_vec *vec,
memset(&vec->items[k], 0, sizeof(struct large_item));
vec->items[k].size = size;
oidcpy(&vec->items[k].oid, oid);
strbuf_init(&vec->items[k].name, 0);
if (name && *name)
strbuf_addstr(&vec->items[k].name, name);

return;
}
}
Expand Down Expand Up @@ -728,15 +748,15 @@ static void survey_report_largest_vec(struct large_item_vec *vec)
return;

table.table_name = vec->dimension_label;
strvec_pushl(&table.header, "Size", "OID", NULL);
strvec_pushl(&table.header, "Size", "OID", "Name", NULL);

for (size_t k = 0; k < vec->nr_items; k++) {
struct large_item *pk = &vec->items[k];
if (!is_null_oid(&pk->oid)) {
strbuf_reset(&size);
strbuf_addf(&size, "%"PRIuMAX, (uintmax_t)pk->size);

insert_table_rowv(&table, size.buf, oid_to_hex(&pk->oid), NULL);
insert_table_rowv(&table, size.buf, oid_to_hex(&pk->oid), pk->name.buf, NULL);
}
}
strbuf_release(&size);
Expand Down Expand Up @@ -1197,7 +1217,8 @@ static void increment_object_counts(

static void increment_totals(struct survey_context *ctx,
struct oid_array *oids,
struct survey_report_object_size_summary *summary)
struct survey_report_object_size_summary *summary,
const char *path)
{
for (size_t i = 0; i < oids->nr; i++) {
struct object_info oi = OBJECT_INFO_INIT;
Expand Down Expand Up @@ -1233,8 +1254,8 @@ static void increment_totals(struct survey_context *ctx,
ctx->report.reachable_objects.commits.parent_cnt_pbin[k]++;
base = &ctx->report.reachable_objects.commits.base;

maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_nr_parents, k, &commit->object.oid);
maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_size_bytes, object_length, &commit->object.oid);
maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_nr_parents, k, &commit->object.oid, NULL);
maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_size_bytes, object_length, &commit->object.oid, NULL);
break;
}
case OBJ_TREE: {
Expand All @@ -1254,8 +1275,8 @@ static void increment_totals(struct survey_context *ctx,

pst->sum_entries += nr_entries;

maybe_insert_large_item(pst->vec_largest_by_nr_entries, nr_entries, &tree->object.oid);
maybe_insert_large_item(pst->vec_largest_by_size_bytes, object_length, &tree->object.oid);
maybe_insert_large_item(pst->vec_largest_by_nr_entries, nr_entries, &tree->object.oid, path);
maybe_insert_large_item(pst->vec_largest_by_size_bytes, object_length, &tree->object.oid, path);

qb = qbin(nr_entries);
incr_obj_hist_bin(&pst->entry_qbin[qb], object_length, disk_sizep);
Expand All @@ -1266,7 +1287,7 @@ static void increment_totals(struct survey_context *ctx,
case OBJ_BLOB:
base = &ctx->report.reachable_objects.blobs.base;

maybe_insert_large_item(ctx->report.reachable_objects.blobs.vec_largest_by_size_bytes, object_length, &oids->oid[i]);
maybe_insert_large_item(ctx->report.reachable_objects.blobs.vec_largest_by_size_bytes, object_length, &oids->oid[i], path);
break;
default:
continue;
Expand Down Expand Up @@ -1306,7 +1327,7 @@ static void increment_object_totals(struct survey_context *ctx,
struct survey_report_object_size_summary *total;
struct survey_report_object_size_summary summary = { 0 };

increment_totals(ctx, oids, &summary);
increment_totals(ctx, oids, &summary, path);

switch (type) {
case OBJ_COMMIT:
Expand Down

0 comments on commit f35a2a1

Please sign in to comment.