1- import  dask .dataframe  as  dd 
1+ """Module providing functions for processing and wrangling data.""" 
2+ 
23from  datetime  import  datetime 
4+ from  pathlib  import  Path 
5+ 
36import  numpy  as  np 
47import  pandas  as  pd 
5- from  pathlib  import  Path 
68
9+ import  dask .dataframe  as  dd 
710from  .config  import  Config 
811
912def  format_outname (prefix : str , se : bool , weekday :bool ):
10-     ''' 
13+     """ 
14+     Write out results. 
1115
1216    Parameters 
1317    ---------- 
14-     prefix 
15-     se 
16-     weekday 
18+     prefix: 
19+     se: boolean to write out standard errors, if true, use an obfuscated name 
20+     weekday: boolean for weekday adjustments. 
21+              signals will be generated with weekday adjustments (True) or without 
22+                 adjustments (False) 
1723
1824    Returns 
1925    ------- 
20- 
21-     ''' 
22-     # write out results 
26+     outname str 
27+     """ 
2328    out_name  =  "smoothed_adj_cli"  if  weekday  else  "smoothed_cli" 
2429    if  se :
2530        assert  prefix  is  not   None , "template has no obfuscated prefix" 
2631        out_name  =  prefix  +  "_"  +  out_name 
2732    return  out_name 
2833
2934def  format_df (df : pd .DataFrame , geo_id : str , se : bool , logger ):
30-     ''' 
31-     format dataframe and checks for anomalies to write results 
35+     """ 
36+     Format dataframe and checks for anomalies to write results. 
37+ 
3238    Parameters 
3339    ---------- 
3440    df: dataframe from output from update_sensor 
@@ -39,7 +45,7 @@ def format_df(df: pd.DataFrame, geo_id: str, se: bool, logger):
3945    Returns 
4046    ------- 
4147    filtered and formatted dataframe 
42-     '''  
48+     """  
4349    # report in percentage 
4450    df ['val' ] =  df ['val' ] *  100 
4551    df ["se" ] =  df ["se" ] *  100 
@@ -66,7 +72,7 @@ def format_df(df: pd.DataFrame, geo_id: str, se: bool, logger):
6672        valid_cond  =  (df ['se' ] >  0 ) &  (df ['val' ] >  0 )
6773        invalid_df  =  df [~ valid_cond ]
6874        if  len (invalid_df ) >  0 :
69-             logger .info (f "p=0, std_err=0 invalid" )
75+             logger .info ("p=0, std_err=0 invalid" )
7076        df  =  df [valid_cond ]
7177    else :
7278        df ["se" ] =  np .NAN 
@@ -76,7 +82,8 @@ def format_df(df: pd.DataFrame, geo_id: str, se: bool, logger):
7682    return  df 
7783
7884def  write_to_csv (output_df : pd .DataFrame , prefix : str , geo_id : str , weekday : bool , se :bool , logger , output_path = "." ):
79-     """Write sensor values to csv. 
85+     """ 
86+     Write sensor values to csv. 
8087
8188    Args: 
8289      output_dict: dictionary containing sensor rates, se, unique dates, and unique geo_id 
@@ -106,9 +113,9 @@ def write_to_csv(output_df: pd.DataFrame, prefix: str, geo_id: str, weekday: boo
106113
107114
108115def  csv_to_df (filepath : str , startdate : datetime , enddate : datetime , dropdate : datetime , logger ) ->  pd .DataFrame :
109-     ''' 
110-     Reads  csv using Dask and  filters out based on date range and currently unused column,  
111-     then converts back into pandas dataframe. 
116+     """ 
117+     Read  csv using Dask,  filters unneeded data, then converts back into pandas dataframe.  
118+ 
112119    Parameters 
113120    ---------- 
114121      filepath: path to the aggregated doctor-visits data 
@@ -117,7 +124,7 @@ def csv_to_df(filepath: str, startdate: datetime, enddate: datetime, dropdate: d
117124      dropdate: data drop date (YYYY-mm-dd) 
118125
119126    ------- 
120-     '''  
127+     """  
121128    filepath  =  Path (filepath )
122129    logger .info (f"Processing { filepath }  " )
123130
0 commit comments