@@ -419,6 +419,28 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
419419 char new_dir_first_char = new_dir [0 ];
420420 int first_time_in_loop = 1 ;
421421
422+ if (!info -> setup )
423+ /*
424+ * info->setup is 0 here in two cases: (1) all auxiliary
425+ * vars (like dirs_removed) were NULL so
426+ * initialize_dir_rename_info() returned early, or (2)
427+ * either break detection or copy detection are active so
428+ * that we never called initialize_dir_rename_info(). In
429+ * the former case, we don't have enough info to know if
430+ * directories were renamed (because dirs_removed lets us
431+ * know about a necessary prerequisite, namely if they were
432+ * removed), and in the latter, we don't care about
433+ * directory renames or find_basename_matches.
434+ *
435+ * This matters because both basename and inexact matching
436+ * will also call update_dir_rename_counts(). In either of
437+ * the above two cases info->dir_rename_counts will not
438+ * have been properly initialized which prevents us from
439+ * updating it, but in these two cases we don't care about
440+ * dir_rename_counts anyway, so we can just exit early.
441+ */
442+ return ;
443+
422444 while (1 ) {
423445 dirname_munge (old_dir );
424446 dirname_munge (new_dir );
@@ -479,45 +501,29 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
479501 free (new_dir );
480502}
481503
482- static void compute_dir_rename_counts (struct dir_rename_info * info ,
483- struct strset * dirs_removed ,
484- struct strmap * dir_rename_count )
504+ static void initialize_dir_rename_info (struct dir_rename_info * info ,
505+ struct strset * dirs_removed ,
506+ struct strmap * dir_rename_count )
485507{
486508 int i ;
487509
488- info -> setup = 1 ;
489- info -> dir_rename_count = dir_rename_count ;
490-
491- for (i = 0 ; i < rename_dst_nr ; ++ i ) {
492- /* File not part of directory rename counts if not a rename */
493- if (!rename_dst [i ].is_rename )
494- continue ;
495-
496- /*
497- * Make dir_rename_count contain a map of a map:
498- * old_directory -> {new_directory -> count}
499- * In other words, for every pair look at the directories for
500- * the old filename and the new filename and count how many
501- * times that pairing occurs.
502- */
503- update_dir_rename_counts (info , dirs_removed ,
504- rename_dst [i ].p -> one -> path ,
505- rename_dst [i ].p -> two -> path );
510+ if (!dirs_removed ) {
511+ info -> setup = 0 ;
512+ return ;
506513 }
507- }
508-
509- static void initialize_dir_rename_info (struct dir_rename_info * info )
510- {
511- int i ;
512-
513514 info -> setup = 1 ;
514515
516+ info -> dir_rename_count = dir_rename_count ;
517+ if (!info -> dir_rename_count ) {
518+ info -> dir_rename_count = xmalloc (sizeof (* dir_rename_count ));
519+ strmap_init (info -> dir_rename_count );
520+ }
515521 strintmap_init_with_options (& info -> idx_map , -1 , NULL , 0 );
516522 strmap_init_with_options (& info -> dir_rename_guess , NULL , 0 );
517- info -> dir_rename_count = NULL ;
518523
519524 /*
520- * Loop setting up both info->idx_map.
525+ * Loop setting up both info->idx_map, and doing setup of
526+ * info->dir_rename_count.
521527 */
522528 for (i = 0 ; i < rename_dst_nr ; ++ i ) {
523529 /*
@@ -527,7 +533,20 @@ static void initialize_dir_rename_info(struct dir_rename_info *info)
527533 if (!rename_dst [i ].is_rename ) {
528534 char * filename = rename_dst [i ].p -> two -> path ;
529535 strintmap_set (& info -> idx_map , filename , i );
536+ continue ;
530537 }
538+
539+ /*
540+ * For everything else (i.e. renamed files), make
541+ * dir_rename_count contain a map of a map:
542+ * old_directory -> {new_directory -> count}
543+ * In other words, for every pair look at the directories for
544+ * the old filename and the new filename and count how many
545+ * times that pairing occurs.
546+ */
547+ update_dir_rename_counts (info , dirs_removed ,
548+ rename_dst [i ].p -> one -> path ,
549+ rename_dst [i ].p -> two -> path );
531550 }
532551}
533552
@@ -682,7 +701,8 @@ static int idx_possible_rename(char *filename, struct dir_rename_info *info)
682701
683702static int find_basename_matches (struct diff_options * options ,
684703 int minimum_score ,
685- struct dir_rename_info * info )
704+ struct dir_rename_info * info ,
705+ struct strset * dirs_removed )
686706{
687707 /*
688708 * When I checked in early 2020, over 76% of file renames in linux
@@ -810,6 +830,8 @@ static int find_basename_matches(struct diff_options *options,
810830 continue ;
811831 record_rename_pair (dst_index , src_index , score );
812832 renames ++ ;
833+ update_dir_rename_counts (info , dirs_removed ,
834+ one -> path , two -> path );
813835
814836 /*
815837 * Found a rename so don't need text anymore; if we
@@ -893,7 +915,12 @@ static int too_many_rename_candidates(int num_destinations, int num_sources,
893915 return 1 ;
894916}
895917
896- static int find_renames (struct diff_score * mx , int dst_cnt , int minimum_score , int copies )
918+ static int find_renames (struct diff_score * mx ,
919+ int dst_cnt ,
920+ int minimum_score ,
921+ int copies ,
922+ struct dir_rename_info * info ,
923+ struct strset * dirs_removed )
897924{
898925 int count = 0 , i ;
899926
@@ -910,6 +937,9 @@ static int find_renames(struct diff_score *mx, int dst_cnt, int minimum_score, i
910937 continue ;
911938 record_rename_pair (mx [i ].dst , mx [i ].src , mx [i ].score );
912939 count ++ ;
940+ update_dir_rename_counts (info , dirs_removed ,
941+ rename_src [mx [i ].src ].p -> one -> path ,
942+ rename_dst [mx [i ].dst ].p -> two -> path );
913943 }
914944 return count ;
915945}
@@ -981,6 +1011,8 @@ void diffcore_rename_extended(struct diff_options *options,
9811011 info .setup = 0 ;
9821012 assert (!dir_rename_count || strmap_empty (dir_rename_count ));
9831013 want_copies = (detect_rename == DIFF_DETECT_COPY );
1014+ if (dirs_removed && (break_idx || want_copies ))
1015+ BUG ("dirs_removed incompatible with break/copy detection" );
9841016 if (!minimum_score )
9851017 minimum_score = DEFAULT_RENAME_SCORE ;
9861018
@@ -1074,14 +1106,15 @@ void diffcore_rename_extended(struct diff_options *options,
10741106
10751107 /* Preparation for basename-driven matching. */
10761108 trace2_region_enter ("diff" , "dir rename setup" , options -> repo );
1077- initialize_dir_rename_info (& info );
1109+ initialize_dir_rename_info (& info ,
1110+ dirs_removed , dir_rename_count );
10781111 trace2_region_leave ("diff" , "dir rename setup" , options -> repo );
10791112
10801113 /* Utilize file basenames to quickly find renames. */
10811114 trace2_region_enter ("diff" , "basename matches" , options -> repo );
10821115 rename_count += find_basename_matches (options ,
10831116 min_basename_score ,
1084- & info );
1117+ & info , dirs_removed );
10851118 trace2_region_leave ("diff" , "basename matches" , options -> repo );
10861119
10871120 /*
@@ -1167,18 +1200,15 @@ void diffcore_rename_extended(struct diff_options *options,
11671200 /* cost matrix sorted by most to least similar pair */
11681201 STABLE_QSORT (mx , dst_cnt * NUM_CANDIDATE_PER_DST , score_compare );
11691202
1170- rename_count += find_renames (mx , dst_cnt , minimum_score , 0 );
1203+ rename_count += find_renames (mx , dst_cnt , minimum_score , 0 ,
1204+ & info , dirs_removed );
11711205 if (want_copies )
1172- rename_count += find_renames (mx , dst_cnt , minimum_score , 1 );
1206+ rename_count += find_renames (mx , dst_cnt , minimum_score , 1 ,
1207+ & info , dirs_removed );
11731208 free (mx );
11741209 trace2_region_leave ("diff" , "inexact renames" , options -> repo );
11751210
11761211 cleanup :
1177- /*
1178- * Now that renames have been computed, compute dir_rename_count */
1179- if (dirs_removed && dir_rename_count )
1180- compute_dir_rename_counts (& info , dirs_removed , dir_rename_count );
1181-
11821212 /* At this point, we have found some renames and copies and they
11831213 * are recorded in rename_dst. The original list is still in *q.
11841214 */
0 commit comments