@@ -41,6 +41,15 @@ static struct survey_refs_wanted default_ref_options = {
4141struct survey_opts {
4242 int verbose ;
4343 int show_progress ;
44+
45+ int show_largest_commits_by_nr_parents ;
46+ int show_largest_commits_by_size_bytes ;
47+
48+ int show_largest_trees_by_nr_entries ;
49+ int show_largest_trees_by_size_bytes ;
50+
51+ int show_largest_blobs_by_size_bytes ;
52+
4453 int top_nr ;
4554 struct survey_refs_wanted refs ;
4655};
@@ -138,6 +147,87 @@ static void incr_obj_hist_bin(struct obj_hist_bin *pbin,
138147 pbin -> cnt_seen ++ ;
139148}
140149
150+ /*
151+ * Remember the largest n objects for some scaling dimension. This
152+ * could be the observed object size or number of entries in a tree.
153+ * We'll use this to generate a sorted vector in the output for that
154+ * dimension.
155+ */
156+ struct large_item {
157+ uint64_t size ;
158+ struct object_id oid ;
159+ };
160+
161+ struct large_item_vec {
162+ char * dimension_label ;
163+ char * item_label ;
164+ uint64_t nr_items ;
165+ struct large_item items [FLEX_ARRAY ]; /* nr_items */
166+ };
167+
168+ static struct large_item_vec * alloc_large_item_vec (const char * dimension_label ,
169+ const char * item_label ,
170+ uint64_t nr_items )
171+ {
172+ struct large_item_vec * vec ;
173+ size_t flex_len = nr_items * sizeof (struct large_item );
174+
175+ if (!nr_items )
176+ return NULL ;
177+
178+ vec = xcalloc (1 , (sizeof (struct large_item_vec ) + flex_len ));
179+ vec -> dimension_label = strdup (dimension_label );
180+ vec -> item_label = strdup (item_label );
181+ vec -> nr_items = nr_items ;
182+
183+ return vec ;
184+ }
185+
186+ static void free_large_item_vec (struct large_item_vec * vec )
187+ {
188+ if (!vec )
189+ return ;
190+
191+ free (vec -> dimension_label );
192+ free (vec -> item_label );
193+ free (vec );
194+ }
195+
196+ static void maybe_insert_large_item (struct large_item_vec * vec ,
197+ uint64_t size ,
198+ struct object_id * oid )
199+ {
200+ size_t rest_len ;
201+ size_t k ;
202+
203+ if (!vec || !vec -> nr_items )
204+ return ;
205+
206+ /*
207+ * Since the odds an object being among the largest n
208+ * is small, shortcut and see if it is smaller than
209+ * the smallest one in our set and quickly reject it.
210+ */
211+ if (size < vec -> items [vec -> nr_items - 1 ].size )
212+ return ;
213+
214+ for (k = 0 ; k < vec -> nr_items ; k ++ ) {
215+ if (size < vec -> items [k ].size )
216+ continue ;
217+
218+ /* push items[k..] down one and insert it here */
219+
220+ rest_len = (vec -> nr_items - k - 1 ) * sizeof (struct large_item );
221+ if (rest_len )
222+ memmove (& vec -> items [k + 1 ], & vec -> items [k ], rest_len );
223+
224+ memset (& vec -> items [k ], 0 , sizeof (struct large_item ));
225+ vec -> items [k ].size = size ;
226+ oidcpy (& vec -> items [k ].oid , oid );
227+ return ;
228+ }
229+ }
230+
141231/*
142232 * Common fields for any type of object.
143233 */
@@ -183,6 +273,9 @@ struct survey_stats_commits {
183273 * Count of commits with k parents.
184274 */
185275 uint32_t parent_cnt_pbin [PBIN_VEC_LEN ];
276+
277+ struct large_item_vec * vec_largest_by_nr_parents ;
278+ struct large_item_vec * vec_largest_by_size_bytes ;
186279};
187280
188281/*
@@ -192,11 +285,18 @@ struct survey_stats_trees {
192285 struct survey_stats_base_object base ;
193286
194287 /*
195- * In the following, nr_entries refers to the number of files or
196- * subdirectories in a tree. We are interested in how wide the
197- * tree is and if the repo has gigantic directories.
288+ * Keep a vector of the trees with the most number of entries.
289+ * This gives us a feel for the width of a tree when there are
290+ * gigantic directories.
198291 */
199- uint64_t max_entries ; /* max(nr_entries) -- the width of the largest tree */
292+ struct large_item_vec * vec_largest_by_nr_entries ;
293+
294+ /*
295+ * Keep a vector of the trees with the largest size in bytes.
296+ * The contents of this may or may not match items in the other
297+ * vector, since entryname length can alter the results.
298+ */
299+ struct large_item_vec * vec_largest_by_size_bytes ;
200300
201301 /*
202302 * Computing the sum of the number of entries across all trees
@@ -216,6 +316,11 @@ struct survey_stats_trees {
216316 */
217317struct survey_stats_blobs {
218318 struct survey_stats_base_object base ;
319+
320+ /*
321+ * Remember the OIDs of the largest n blobs.
322+ */
323+ struct large_item_vec * vec_largest_by_size_bytes ;
219324};
220325
221326struct survey_report_object_summary {
@@ -396,6 +501,12 @@ struct survey_context {
396501
397502static void clear_survey_context (struct survey_context * ctx )
398503{
504+ free_large_item_vec (ctx -> report .reachable_objects .commits .vec_largest_by_nr_parents );
505+ free_large_item_vec (ctx -> report .reachable_objects .commits .vec_largest_by_size_bytes );
506+ free_large_item_vec (ctx -> report .reachable_objects .trees .vec_largest_by_nr_entries );
507+ free_large_item_vec (ctx -> report .reachable_objects .trees .vec_largest_by_size_bytes );
508+ free_large_item_vec (ctx -> report .reachable_objects .blobs .vec_largest_by_size_bytes );
509+
399510 ref_array_clear (& ctx -> ref_array );
400511 strvec_clear (& ctx -> refs );
401512}
@@ -608,6 +719,32 @@ static void survey_report_commit_parents(struct survey_context *ctx)
608719 clear_table (& table );
609720}
610721
722+ static void survey_report_largest_vec (struct large_item_vec * vec )
723+ {
724+ struct survey_table table = SURVEY_TABLE_INIT ;
725+ struct strbuf size = STRBUF_INIT ;
726+
727+ if (!vec || !vec -> nr_items )
728+ return ;
729+
730+ table .table_name = vec -> dimension_label ;
731+ strvec_pushl (& table .header , "Size" , "OID" , NULL );
732+
733+ for (size_t k = 0 ; k < vec -> nr_items ; k ++ ) {
734+ struct large_item * pk = & vec -> items [k ];
735+ if (!is_null_oid (& pk -> oid )) {
736+ strbuf_reset (& size );
737+ strbuf_addf (& size , "%" PRIuMAX , (uintmax_t )pk -> size );
738+
739+ insert_table_rowv (& table , size .buf , oid_to_hex (& pk -> oid ), NULL );
740+ }
741+ }
742+ strbuf_release (& size );
743+
744+ print_table_plaintext (& table );
745+ clear_table (& table );
746+ }
747+
611748static void survey_report_plaintext_refs (struct survey_context * ctx )
612749{
613750 struct survey_report_ref_summary * refs = & ctx -> report .refs ;
@@ -787,6 +924,12 @@ static void survey_report_plaintext(struct survey_context *ctx)
787924 & ctx -> report .top_paths_by_inflate [REPORT_TYPE_TREE ]);
788925 survey_report_plaintext_sorted_size (
789926 & ctx -> report .top_paths_by_inflate [REPORT_TYPE_BLOB ]);
927+
928+ survey_report_largest_vec (ctx -> report .reachable_objects .commits .vec_largest_by_nr_parents );
929+ survey_report_largest_vec (ctx -> report .reachable_objects .commits .vec_largest_by_size_bytes );
930+ survey_report_largest_vec (ctx -> report .reachable_objects .trees .vec_largest_by_nr_entries );
931+ survey_report_largest_vec (ctx -> report .reachable_objects .trees .vec_largest_by_size_bytes );
932+ survey_report_largest_vec (ctx -> report .reachable_objects .blobs .vec_largest_by_size_bytes );
790933}
791934
792935/*
@@ -858,6 +1001,27 @@ static int survey_load_config_cb(const char *var, const char *value,
8581001 ctx -> opts .show_progress = git_config_bool (var , value );
8591002 return 0 ;
8601003 }
1004+ if (!strcmp (var , "survey.showcommitparents" )) {
1005+ ctx -> opts .show_largest_commits_by_nr_parents = git_config_ulong (var , value , cctx -> kvi );
1006+ return 0 ;
1007+ }
1008+ if (!strcmp (var , "survey.showcommitsizes" )) {
1009+ ctx -> opts .show_largest_commits_by_size_bytes = git_config_ulong (var , value , cctx -> kvi );
1010+ return 0 ;
1011+ }
1012+
1013+ if (!strcmp (var , "survey.showtreeentries" )) {
1014+ ctx -> opts .show_largest_trees_by_nr_entries = git_config_ulong (var , value , cctx -> kvi );
1015+ return 0 ;
1016+ }
1017+ if (!strcmp (var , "survey.showtreesizes" )) {
1018+ ctx -> opts .show_largest_trees_by_size_bytes = git_config_ulong (var , value , cctx -> kvi );
1019+ return 0 ;
1020+ }
1021+ if (!strcmp (var , "survey.showblobsizes" )) {
1022+ ctx -> opts .show_largest_blobs_by_size_bytes = git_config_ulong (var , value , cctx -> kvi );
1023+ return 0 ;
1024+ }
8611025 if (!strcmp (var , "survey.top" )) {
8621026 ctx -> opts .top_nr = git_config_bool (var , value );
8631027 return 0 ;
@@ -1069,6 +1233,9 @@ static void increment_totals(struct survey_context *ctx,
10691233
10701234 ctx -> report .reachable_objects .commits .parent_cnt_pbin [k ]++ ;
10711235 base = & ctx -> report .reachable_objects .commits .base ;
1236+
1237+ maybe_insert_large_item (ctx -> report .reachable_objects .commits .vec_largest_by_nr_parents , k , & commit -> object .oid );
1238+ maybe_insert_large_item (ctx -> report .reachable_objects .commits .vec_largest_by_size_bytes , object_length , & commit -> object .oid );
10721239 break ;
10731240 }
10741241 case OBJ_TREE : {
@@ -1088,8 +1255,8 @@ static void increment_totals(struct survey_context *ctx,
10881255
10891256 pst -> sum_entries += nr_entries ;
10901257
1091- if ( nr_entries > pst -> max_entries )
1092- pst -> max_entries = nr_entries ;
1258+ maybe_insert_large_item ( pst -> vec_largest_by_nr_entries , nr_entries , & tree -> object . oid );
1259+ maybe_insert_large_item ( pst -> vec_largest_by_size_bytes , object_length , & tree -> object . oid ) ;
10931260
10941261 qb = qbin (nr_entries );
10951262 incr_obj_hist_bin (& pst -> entry_qbin [qb ], object_length , disk_sizep );
@@ -1099,6 +1266,8 @@ static void increment_totals(struct survey_context *ctx,
10991266 }
11001267 case OBJ_BLOB :
11011268 base = & ctx -> report .reachable_objects .blobs .base ;
1269+
1270+ maybe_insert_large_item (ctx -> report .reachable_objects .blobs .vec_largest_by_size_bytes , object_length , & oids -> oid [i ]);
11021271 break ;
11031272 default :
11041273 continue ;
@@ -1307,6 +1476,14 @@ int cmd_survey(int argc, const char **argv, const char *prefix, struct repositor
13071476 OPT_BOOL_F (0 , "detached" , & ctx .opts .refs .want_detached , N_ ("include detached HEAD" ), PARSE_OPT_NONEG ),
13081477 OPT_BOOL_F (0 , "other" , & ctx .opts .refs .want_other , N_ ("include notes and stashes" ), PARSE_OPT_NONEG ),
13091478
1479+ OPT_INTEGER_F (0 , "commit-parents" , & ctx .opts .show_largest_commits_by_nr_parents , N_ ("show N largest commits by parent count" ), PARSE_OPT_NONEG ),
1480+ OPT_INTEGER_F (0 , "commit-sizes" , & ctx .opts .show_largest_commits_by_size_bytes , N_ ("show N largest commits by size in bytes" ), PARSE_OPT_NONEG ),
1481+
1482+ OPT_INTEGER_F (0 , "tree-entries" , & ctx .opts .show_largest_trees_by_nr_entries , N_ ("show N largest trees by entry count" ), PARSE_OPT_NONEG ),
1483+ OPT_INTEGER_F (0 , "tree-sizes" , & ctx .opts .show_largest_trees_by_size_bytes , N_ ("show N largest trees by size in bytes" ), PARSE_OPT_NONEG ),
1484+
1485+ OPT_INTEGER_F (0 , "blob-sizes" , & ctx .opts .show_largest_blobs_by_size_bytes , N_ ("show N largest blobs by size in bytes" ), PARSE_OPT_NONEG ),
1486+
13101487 OPT_END (),
13111488 };
13121489
@@ -1330,6 +1507,39 @@ int cmd_survey(int argc, const char **argv, const char *prefix, struct repositor
13301507
13311508 fixup_refs_wanted (& ctx );
13321509
1510+ if (ctx .opts .show_largest_commits_by_nr_parents )
1511+ ctx .report .reachable_objects .commits .vec_largest_by_nr_parents =
1512+ alloc_large_item_vec (
1513+ "largest_commits_by_nr_parents" ,
1514+ "nr_parents" ,
1515+ ctx .opts .show_largest_commits_by_nr_parents );
1516+ if (ctx .opts .show_largest_commits_by_size_bytes )
1517+ ctx .report .reachable_objects .commits .vec_largest_by_size_bytes =
1518+ alloc_large_item_vec (
1519+ "largest_commits_by_size_bytes" ,
1520+ "size" ,
1521+ ctx .opts .show_largest_commits_by_size_bytes );
1522+
1523+ if (ctx .opts .show_largest_trees_by_nr_entries )
1524+ ctx .report .reachable_objects .trees .vec_largest_by_nr_entries =
1525+ alloc_large_item_vec (
1526+ "largest_trees_by_nr_entries" ,
1527+ "nr_entries" ,
1528+ ctx .opts .show_largest_trees_by_nr_entries );
1529+ if (ctx .opts .show_largest_trees_by_size_bytes )
1530+ ctx .report .reachable_objects .trees .vec_largest_by_size_bytes =
1531+ alloc_large_item_vec (
1532+ "largest_trees_by_size_bytes" ,
1533+ "size" ,
1534+ ctx .opts .show_largest_trees_by_size_bytes );
1535+
1536+ if (ctx .opts .show_largest_blobs_by_size_bytes )
1537+ ctx .report .reachable_objects .blobs .vec_largest_by_size_bytes =
1538+ alloc_large_item_vec (
1539+ "largest_blobs_by_size_bytes" ,
1540+ "size" ,
1541+ ctx .opts .show_largest_blobs_by_size_bytes );
1542+
13331543 survey_phase_refs (& ctx );
13341544
13351545 survey_phase_objects (& ctx );
0 commit comments