3333FIPS_PUERTO_RICO_POPULATION_URL = "https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt?"
3434STATE_HHS_FILE = "hhs.txt"
3535ZIP_POP_MISSING_FILE = "zip_pop_filling.csv"
36+ CHNG_COUNTY_GROUPS_FILE = "chng_county_groups.csv"
3637
3738# Out files
3839FIPS_STATE_OUT_FILENAME = "fips_state_table.csv"
3940FIPS_MSA_OUT_FILENAME = "fips_msa_table.csv"
4041FIPS_HRR_OUT_FILENAME = "fips_hrr_table.csv"
4142FIPS_ZIP_OUT_FILENAME = "fips_zip_table.csv"
4243FIPS_HHS_FILENAME = "fips_hhs_table.csv"
44+ FIPS_CHNGFIPS_OUT_FILENAME = "fips_chng-fips_table.csv"
4345FIPS_POPULATION_OUT_FILENAME = "fips_pop.csv"
4446
47+ CHNGFIPS_STATE_OUT_FILENAME = "chng-fips_state_table.csv"
4548ZIP_HSA_OUT_FILENAME = "zip_hsa_table.csv"
4649ZIP_HRR_OUT_FILENAME = "zip_hrr_table.csv"
4750ZIP_FIPS_OUT_FILENAME = "zip_fips_table.csv"
@@ -475,6 +478,176 @@ def derive_zip_hhs_crosswalk():
475478 zip_state .sort_values (["zip" , "hhs" ]).to_csv (join (OUTPUT_DIR , ZIP_HHS_FILENAME ), index = False )
476479
477480
481+ def derive_fips_chngfips_crosswalk ():
482+ """Build a crosswalk table for FIPS to CHNG FIPS."""
483+ if not isfile (join (OUTPUT_DIR , FIPS_STATE_OUT_FILENAME )):
484+ derive_fips_state_crosswalk ()
485+
486+ assign_county_groups ()
487+ county_groups = pd .read_csv (CHNG_COUNTY_GROUPS_FILE , dtype = "string" , index_col = False )
488+ # Split list of county FIPS codes into separate columns.
489+ county_groups = pd .concat (
490+ [county_groups , county_groups .fips_list .str .split ("|" , expand = True )],
491+ axis = 1
492+ ).drop (
493+ columns = "fips_list"
494+ )
495+
496+ # Change to long format.
497+ county_groups = pd .melt (
498+ county_groups ,
499+ id_vars = ["state_fips" , "group" ],
500+ var_name = "county_num" ,
501+ value_name = "fips"
502+ ).drop (
503+ columns = "county_num"
504+ ).dropna ()
505+
506+ county_groups ["state_fips" ] = county_groups ["state_fips" ].str .zfill (2 )
507+ county_groups ["group" ] = county_groups ["group" ].str .zfill (2 )
508+ county_groups ["fips" ] = county_groups ["fips" ].str .zfill (5 ).astype ("string" )
509+ # Combine state codes and group ids into a single FIPS code.
510+ county_groups ["chng-fips" ] = county_groups ["state_fips" ] + "g" + county_groups ["group" ]
511+
512+ county_groups = county_groups [["fips" , "chng-fips" ]]
513+ fips_to_state = pd .read_csv (join (OUTPUT_DIR , FIPS_STATE_OUT_FILENAME ), dtype = "string" , index_col = False )
514+
515+ # Get all the fips that aren't included in the chng groupings.
516+ extra_fips_list = list (set (fips_to_state .fips ) - set (county_groups .fips ))
517+ # Normal fips codes and CHNG fips codes are the same for ungrouped counties.
518+ extra_fips_df = pd .DataFrame ({"fips" : extra_fips_list , "chng-fips" : extra_fips_list }, dtype = "string" )
519+
520+ # Combine grouped and ungrouped counties.
521+ pd .concat (
522+ [county_groups , extra_fips_df ]
523+ ).sort_values (
524+ ["fips" , "chng-fips" ]
525+ ).to_csv (
526+ join (OUTPUT_DIR , FIPS_CHNGFIPS_OUT_FILENAME ), index = False
527+ )
528+
529+
530+ def derive_chngfips_state_crosswalk ():
531+ """Build a crosswalk table for FIPS to CHNG FIPS."""
532+ if not isfile (join (OUTPUT_DIR , FIPS_STATE_OUT_FILENAME )):
533+ derive_fips_state_crosswalk ()
534+
535+ if not isfile (join (OUTPUT_DIR , FIPS_CHNGFIPS_OUT_FILENAME )):
536+ derive_fips_chngfips_crosswalk ()
537+
538+ fips_to_group = pd .read_csv (join (OUTPUT_DIR , FIPS_CHNGFIPS_OUT_FILENAME ), dtype = "string" , index_col = False )
539+ fips_to_state = pd .read_csv (join (OUTPUT_DIR , FIPS_STATE_OUT_FILENAME ), dtype = "string" , index_col = False )
540+
541+ group_to_state = fips_to_group .join (
542+ fips_to_state .set_index ("fips" ), on = "fips" , how = "left"
543+ ).drop (
544+ columns = "fips"
545+ ).drop_duplicates (
546+ ).sort_values (
547+ ["chng-fips" , "state_code" ]
548+ )
549+ group_to_state .to_csv (join (OUTPUT_DIR , CHNGFIPS_STATE_OUT_FILENAME ), index = False )
550+
551+
552+ def fetch_county_groups_spreadsheet ():
553+ # County mapping file is derived from
554+ # https://docs.google.com/spreadsheets/d/1PEce4CjjHbRM1Z5xEMNI6Xsq_b2kkCh0/edit#gid=871427657
555+ sheet_id = "1PEce4CjjHbRM1Z5xEMNI6Xsq_b2kkCh0"
556+ sheet_name = "groupings"
557+ # Request sheet in CSV format via tag in URL.
558+ url = f"https://docs.google.com/spreadsheets/d/{ sheet_id } /gviz/tq?tqx=out:csv&sheet={ sheet_name } "
559+
560+ county_groups = pd .read_csv (
561+ url , dtype = "string" , index_col = False
562+ ).dropna (
563+ how = "all" , axis = 1
564+ )
565+ county_groups ["state FIPS" ] = county_groups ["state FIPS" ].astype (int )
566+
567+ # Counties belonging to each group are listed (as FIPS codes) in the "county
568+ # FIPS grouping" column, concatenated and separated by the pipe "|".
569+ new_names = {
570+ "state FIPS" : "state_fips" ,
571+ "county FIPS grouping" : "fips_list"
572+ }
573+
574+ county_groups = county_groups .rename (
575+ columns = new_names
576+ )[new_names .values ()]
577+
578+ return county_groups
579+
580+
581+ def assign_county_groups ():
582+ county_groups = fetch_county_groups_spreadsheet ()
583+
584+ # If a county groups mapping file already exists in `data_proc/geomap`, we
585+ # have to be careful to not reassign a group number to a different group.
586+ # Group numbers must remain fixed, even if a given county group is no longer
587+ # being used.
588+ if isfile (CHNG_COUNTY_GROUPS_FILE ):
589+ old_county_groups = pd .read_csv (CHNG_COUNTY_GROUPS_FILE , dtype = "string" , index_col = False )
590+ old_county_groups .group = old_county_groups .group .astype (int )
591+ old_county_groups .state_fips = old_county_groups .state_fips .astype (int )
592+
593+ # Remove rows from county_groups if that `fips_list` value already
594+ # exists in old_county_groups.
595+ county_groups = county_groups [
596+ ~ county_groups .fips_list .isin (old_county_groups .fips_list )
597+ ]
598+
599+ # If grouping file has no new rows, no need to process again.
600+ if county_groups .empty :
601+ return
602+ # Grouping spreadsheet contains rows not seen in old, on-disk county
603+ # groupings file. Combining the two is delicate. While the code below
604+ # appears to work, it has not been formally tested and could be
605+ # invalid for even small changes to the format of the input county
606+ # groupings file.
607+ else :
608+ raise NotImplementedError (
609+ "Can't combine old and new county groupings automatically, "
610+ "code below is not tested or robust to changes in input format."
611+ "We recommend manually working with the code below and the new"
612+ "data in a REPL."
613+ )
614+
615+ # Assign an incrementing integer to be the group id of each remaining
616+ # county grouping within a state using the given sort order.
617+ county_groups ["group" ] = county_groups .groupby ("state_fips" ).cumcount () + 1
618+
619+ # Find max group number by state in old_county_groups, join on, and
620+ # add max group number to group number.
621+ max_group_by_state = old_county_groups .groupby (
622+ "state_fips"
623+ ).group .max (
624+ ).reset_index (
625+ ).rename (
626+ columns = {"group" : "max_group" }
627+ )
628+ county_groups = county_groups .join (
629+ max_group_by_state .set_index ("state_fips" ),
630+ how = "left" ,
631+ on = "state_fips"
632+ ).assign (
633+ group = lambda x : x .group + x .max_group
634+ ).drop (
635+ ["max_group" ], axis = 1
636+ )
637+
638+ # Combine old_county_groups and county_groups
639+ county_groups = pd .concat ([old_county_groups , county_groups ])
640+ else :
641+ # Group numbers are 1-indexed.
642+ county_groups ["group" ] = county_groups .groupby ("state_fips" ).cumcount () + 1
643+
644+ county_groups .sort_values (
645+ ["state_fips" ], kind = "stable"
646+ ).to_csv (
647+ CHNG_COUNTY_GROUPS_FILE , index = False
648+ )
649+
650+
478651def clear_dir (dir_path : str ):
479652 for fname in listdir (dir_path ):
480653 remove (join (dir_path , fname ))
@@ -501,3 +674,5 @@ def clear_dir(dir_path: str):
501674 derive_zip_population_table ()
502675 derive_fips_hhs_crosswalk ()
503676 derive_zip_hhs_crosswalk ()
677+ derive_fips_chngfips_crosswalk ()
678+ derive_chngfips_state_crosswalk ()
0 commit comments