diff --git a/claims_hosp/delphi_claims_hosp/backfill.py b/claims_hosp/delphi_claims_hosp/backfill.py index 495abd59b..5e6eb8a3d 100644 --- a/claims_hosp/delphi_claims_hosp/backfill.py +++ b/claims_hosp/delphi_claims_hosp/backfill.py @@ -36,23 +36,29 @@ def store_backfill_file(claims_filepath, _end_date, backfill_dir): dtype=Config.CLAIMS_DTYPES, parse_dates=[Config.CLAIMS_DATE_COL], ) - backfilldata.rename({"ServiceDate": "time_value", - "PatCountyFIPS": "fips", - "Denominator": "den", - "Covid_like": "num"}, - axis=1, inplace=True) - backfilldata = gmpr.add_geocode(backfilldata, from_code="fips", new_code="state_id", - from_col="fips", new_col="state_id") + backfilldata.rename( + { + "ServiceDate": "time_value", + "PatCountyFIPS": "fips", + "Denominator": "den", + "Covid_like": "num", + "Flu1": "num_flu", + }, + axis=1, + inplace=True, + ) + backfilldata = gmpr.add_geocode( + backfilldata, from_code="fips", new_code="state_id", from_col="fips", new_col="state_id" + ) #Store one year's backfill data if _end_date.day == 29 and _end_date.month == 2: _start_date = datetime(_end_date.year-1, 2, 28) else: - _start_date = _end_date.replace(year=_end_date.year-1) - selected_columns = ['time_value', 'fips', 'state_id', - 'den', 'num'] - backfilldata = backfilldata.loc[(backfilldata["time_value"] >= _start_date) - & (~backfilldata["fips"].isnull()), - selected_columns] + _start_date = _end_date.replace(year=_end_date.year - 1) + selected_columns = ["time_value", "fips", "state_id", "den", "num", "num_flu"] + backfilldata = backfilldata.loc[ + (backfilldata["time_value"] >= _start_date) & (~backfilldata["fips"].isnull()), selected_columns + ] backfilldata["lag"] = [(_end_date - x).days for x in backfilldata["time_value"]] backfilldata["time_value"] = backfilldata.time_value.dt.strftime("%Y-%m-%d") diff --git a/claims_hosp/delphi_claims_hosp/config.py b/claims_hosp/delphi_claims_hosp/config.py index b9f1741df..c810b328b 100644 --- a/claims_hosp/delphi_claims_hosp/config.py +++ b/claims_hosp/delphi_claims_hosp/config.py @@ -13,8 +13,14 @@ class Config: """Static configuration variables.""" - signal_name = "smoothed_covid19_from_claims" - signal_weekday_name = "smoothed_adj_covid19_from_claims" + signal_name = { + "Covid_like": "smoothed_covid19_from_claims", + "Flu1": "smoothed_flu_from_claims", + } + signal_weekday_name = { + "Covid_like": "smoothed_adj_covid19_from_claims", + "Flu1": "smoothed_adj_flu_from_claims", + } # max number of CPUs available for pool MAX_CPU_POOL = 10 @@ -30,7 +36,7 @@ class Config: DAY_SHIFT = timedelta(days=0) # data columns - CLAIMS_COUNT_COLS = ["Denominator", "Covid_like"] + CLAIMS_COUNT_COLS = ["Denominator", "Covid_like", "Flu1"] CLAIMS_DATE_COL = "ServiceDate" FIPS_COL = "fips" DATE_COL = "timestamp" @@ -44,6 +50,7 @@ class Config: "PatCountyFIPS": str, "Denominator": float, "Covid_like": float, + "Flu1": float, "PatAgeGroup": str, "Pat HRR ID": str, } diff --git a/claims_hosp/delphi_claims_hosp/load_data.py b/claims_hosp/delphi_claims_hosp/load_data.py index 010d9d61b..564d33079 100644 --- a/claims_hosp/delphi_claims_hosp/load_data.py +++ b/claims_hosp/delphi_claims_hosp/load_data.py @@ -52,7 +52,7 @@ def load_claims_data(claims_filepath, dropdate, base_geo): return claims_data -def load_data(input_filepath, dropdate, base_geo): +def load_data(input_filepath, dropdate, base_geo, numerator_name): """ Load in claims data, and combine them. @@ -71,7 +71,7 @@ def load_data(input_filepath, dropdate, base_geo): # rename numerator and denominator data.fillna(0, inplace=True) - data["num"] = data["Covid_like"] + data["num"] = data[numerator_name] data["den"] = data["Denominator"] data = data[['num', 'den']] data.reset_index(inplace=True) diff --git a/claims_hosp/delphi_claims_hosp/run.py b/claims_hosp/delphi_claims_hosp/run.py index a9752072c..bff4d711a 100644 --- a/claims_hosp/delphi_claims_hosp/run.py +++ b/claims_hosp/delphi_claims_hosp/run.py @@ -120,30 +120,37 @@ def run_module(params): else: logger.info("Starting no weekday adj", geo_type=geo) - signal_name = Config.signal_weekday_name if weekday else Config.signal_name - if params["indicator"]["write_se"]: - assert params["indicator"]["obfuscated_prefix"] is not None, \ - "supply obfuscated prefix in params.json" - signal_name = params["indicator"]["obfuscated_prefix"] + "_" + signal_name - - logger.info("Updating signal name", signal=signal_name) - updater = ClaimsHospIndicatorUpdater( - startdate, - enddate, - dropdate, - geo, - params["indicator"]["parallel"], - weekday, - params["indicator"]["write_se"], - signal_name, - logger, - ) - updater.update_indicator( - claims_file, - params["common"]["export_dir"], - ) - max_dates.append(updater.output_dates[-1]) - n_csv_export.append(len(updater.output_dates)) + + for numerator_name in ["Covid_like", "Flu1"]: + + signal_name = ( + Config.signal_weekday_name[numerator_name] if weekday else Config.signal_name[numerator_name] + ) + if params["indicator"]["write_se"]: + assert ( + params["indicator"]["obfuscated_prefix"] is not None + ), "supply obfuscated prefix in params.json" + signal_name = params["indicator"]["obfuscated_prefix"] + "_" + signal_name + + logger.info("Updating signal name", signal=signal_name) + updater = ClaimsHospIndicatorUpdater( + startdate, + enddate, + dropdate, + geo, + params["indicator"]["parallel"], + weekday, + params["indicator"]["write_se"], + signal_name, + numerator_name, + logger, + ) + updater.update_indicator( + claims_file, + params["common"]["export_dir"], + ) + max_dates.append(updater.output_dates[-1]) + n_csv_export.append(len(updater.output_dates)) logger.info("Finished updating", geo_type=geo) # Remove all the raw files diff --git a/claims_hosp/delphi_claims_hosp/update_indicator.py b/claims_hosp/delphi_claims_hosp/update_indicator.py index 5ba8ddd22..0955c2391 100644 --- a/claims_hosp/delphi_claims_hosp/update_indicator.py +++ b/claims_hosp/delphi_claims_hosp/update_indicator.py @@ -27,7 +27,9 @@ class ClaimsHospIndicatorUpdater: # pylint: disable=too-many-instance-attributes, too-many-arguments # all variables are used - def __init__(self, startdate, enddate, dropdate, geo, parallel, weekday, write_se, signal_name, logger): + def __init__( + self, startdate, enddate, dropdate, geo, parallel, weekday, write_se, signal_name, numerator_name, logger + ): """ Initialize updater for the claims-based hospitalization indicator. @@ -45,8 +47,14 @@ def __init__(self, startdate, enddate, dropdate, geo, parallel, weekday, write_s self.startdate, self.enddate, self.dropdate = [pd.to_datetime(t) for t in (startdate, enddate, dropdate)] - self.geo, self.parallel, self.weekday, self.write_se, self.signal_name = \ - geo.lower(), parallel, weekday, write_se, signal_name + self.geo, self.parallel, self.weekday, self.write_se, self.signal_name, self.numerator_name = ( + geo.lower(), + parallel, + weekday, + write_se, + signal_name, + numerator_name, + ) # init in shift_dates, declared here for pylint self.burnindate, self.fit_dates, self.burn_in_dates, self.output_dates = \ @@ -147,7 +155,7 @@ def update_indicator(self, input_filepath, outpath): # load data base_geo = Config.HRR_COL if self.geo == Config.HRR_COL else Config.FIPS_COL - data = load_data(input_filepath, self.dropdate, base_geo) + data = load_data(input_filepath, self.dropdate, base_geo, self.numerator_name) data_frame = self.geo_reindex(data) # handle if we need to adjust by weekday diff --git a/claims_hosp/tests/test_backfill.py b/claims_hosp/tests/test_backfill.py index fcd908461..554a1b188 100644 --- a/claims_hosp/tests/test_backfill.py +++ b/claims_hosp/tests/test_backfill.py @@ -37,7 +37,7 @@ def test_store_backfill_file(self): backfill_df = pd.read_parquet(backfill_dir + "/"+ fn, engine='pyarrow') selected_columns = ['time_value', 'fips', 'state_id', - 'num', 'den', 'lag', 'issue_date'] + 'num', 'den', 'lag', 'issue_date', 'num_flu'] assert set(selected_columns) == set(backfill_df.columns) os.remove(backfill_dir + "/" + fn) diff --git a/claims_hosp/tests/test_indicator.py b/claims_hosp/tests/test_indicator.py index c4a8828a6..e3a0c2422 100644 --- a/claims_hosp/tests/test_indicator.py +++ b/claims_hosp/tests/test_indicator.py @@ -20,8 +20,8 @@ class TestLoadData: - fips_data = load_data(DATA_FILEPATH, DROP_DATE, "fips") - hrr_data = load_data(DATA_FILEPATH, DROP_DATE, "hrr") + fips_data = load_data(DATA_FILEPATH, DROP_DATE, "fips", "Covid_like") + hrr_data = load_data(DATA_FILEPATH, DROP_DATE, "hrr", "Covid_like") def test_backwards_pad(self): num0 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=float).reshape(-1, 1) diff --git a/claims_hosp/tests/test_load_data.py b/claims_hosp/tests/test_load_data.py index 4a033804e..7641e3434 100644 --- a/claims_hosp/tests/test_load_data.py +++ b/claims_hosp/tests/test_load_data.py @@ -21,15 +21,15 @@ class TestLoadData: fips_claims_data = load_claims_data(DATA_FILEPATH, DROP_DATE, "fips") hrr_claims_data = load_claims_data(DATA_FILEPATH, DROP_DATE, "hrr") - fips_data = load_data(DATA_FILEPATH, DROP_DATE, "fips") - hrr_data = load_data(DATA_FILEPATH, DROP_DATE, "hrr") + fips_data = load_data(DATA_FILEPATH, DROP_DATE, "fips", "Covid_like") + hrr_data = load_data(DATA_FILEPATH, DROP_DATE, "hrr", "Covid_like") def test_base_unit(self): with pytest.raises(AssertionError): load_claims_data(DATA_FILEPATH, DROP_DATE, "foo") with pytest.raises(AssertionError): - load_data(DATA_FILEPATH, DROP_DATE, "foo") + load_data(DATA_FILEPATH, DROP_DATE, "foo", "Covid_like") def test_claims_columns(self): assert "hrr" in self.hrr_claims_data.index.names @@ -37,7 +37,7 @@ def test_claims_columns(self): assert "timestamp" in self.hrr_claims_data.index.names assert "timestamp" in self.fips_claims_data.index.names - expected_claims_columns = ["Denominator", "Covid_like"] + expected_claims_columns = ["Denominator", "Covid_like", "Flu1"] for col in expected_claims_columns: assert col in self.fips_claims_data.columns assert col in self.hrr_claims_data.columns diff --git a/claims_hosp/tests/test_update_indicator.py b/claims_hosp/tests/test_update_indicator.py index 5ca527287..7a814db9c 100644 --- a/claims_hosp/tests/test_update_indicator.py +++ b/claims_hosp/tests/test_update_indicator.py @@ -52,6 +52,7 @@ def test_shift_dates(self): self.weekday, self.write_se, Config.signal_name, + "Covid_like", TEST_LOGGER ) ## Test init @@ -74,6 +75,7 @@ def test_geo_reindex(self): self.weekday, self.write_se, Config.signal_name, + "Covid_like", TEST_LOGGER ) updater.shift_dates() @@ -93,6 +95,7 @@ def test_update_indicator(self): self.weekday, self.write_se, Config.signal_name, + "Covid_like", TEST_LOGGER ) @@ -115,6 +118,7 @@ def test_write_to_csv_results(self): self.weekday, self.write_se, Config.signal_name, + "Covid_like", TEST_LOGGER ) @@ -186,7 +190,7 @@ def test_write_to_csv_results(self): def test_write_to_csv_with_se_results(self): obfuscated_name = PARAMS["indicator"]["obfuscated_prefix"] - signal_name = obfuscated_name + "_" + Config.signal_weekday_name + signal_name = obfuscated_name + "_" + Config.signal_weekday_name["Covid_like"] updater = ClaimsHospIndicatorUpdater( "02-01-2020", "06-01-2020", @@ -196,6 +200,7 @@ def test_write_to_csv_with_se_results(self): True, True, signal_name, + "Covid_like", TEST_LOGGER ) @@ -248,6 +253,7 @@ def test_write_to_csv_wrong_results(self): self.weekday, self.write_se, Config.signal_name, + "Covid_like", TEST_LOGGER )