10
10
import covidcast
11
11
12
12
import numpy as np
13
- from pandas import to_datetime
13
+ import pandas as pd
14
14
from delphi_utils import (
15
15
create_export_csv ,
16
16
geomap ,
17
- get_structured_logger
17
+ get_structured_logger ,
18
+ Nans
18
19
)
19
20
from delphi_utils .validator .utils import lag_converter
20
21
24
25
from .pull import pull_gs_data
25
26
26
27
28
+ def add_nancodes (df , smoother ):
29
+ """Add nancodes to the dataframe."""
30
+ idx = pd .IndexSlice
31
+
32
+ # Default missingness codes
33
+ df ["missing_val" ] = Nans .NOT_MISSING
34
+ df ["missing_se" ] = Nans .NOT_APPLICABLE
35
+ df ["missing_sample_size" ] = Nans .NOT_APPLICABLE
36
+
37
+ # Mark early smoothing entries as data insufficient
38
+ if smoother == "smoothed" :
39
+ df .sort_index (inplace = True )
40
+ min_time_value = df .index .min ()[0 ] + 5 * pd .Timedelta (days = 1 )
41
+ df .loc [idx [:min_time_value , :], "missing_val" ] = Nans .CENSORED
42
+
43
+ # Mark any remaining nans with unknown
44
+ remaining_nans_mask = df ["val" ].isnull () & df ["missing_val" ].eq (Nans .NOT_MISSING )
45
+ df .loc [remaining_nans_mask , "missing_val" ] = Nans .OTHER
46
+ return df
47
+
27
48
def run_module (params ):
28
49
"""
29
50
Run Google Symptoms module.
@@ -71,7 +92,7 @@ def run_module(params):
71
92
# Select the larger number of days. Prevents validator from complaining about missing dates,
72
93
# and backfills in case of an outage.
73
94
num_export_days = max (
74
- (datetime .today () - to_datetime (min (gs_metadata .max_time ))).days + 1 ,
95
+ (datetime .today () - pd . to_datetime (min (gs_metadata .max_time ))).days + 1 ,
75
96
params ["validation" ]["common" ].get ("span_length" , 14 ) + global_max_expected_lag
76
97
)
77
98
@@ -108,8 +129,7 @@ def run_module(params):
108
129
).transform (SMOOTHERS_MAP [smoother ][0 ])
109
130
df ["se" ] = np .nan
110
131
df ["sample_size" ] = np .nan
111
- # Drop early entries where data insufficient for smoothing
112
- df = df .loc [~ df ["val" ].isnull (), :]
132
+ df = add_nancodes (df , smoother )
113
133
df = df .reset_index ()
114
134
sensor_name = "_" .join ([smoother , "search" ])
115
135
@@ -121,7 +141,9 @@ def run_module(params):
121
141
start_date = SMOOTHERS_MAP [smoother ][1 ](export_start_date ),
122
142
metric = metric .lower (),
123
143
geo_res = geo_res ,
124
- sensor = sensor_name )
144
+ sensor = sensor_name ,
145
+ logger = logger )
146
+
125
147
if not exported_csv_dates .empty :
126
148
logger .info ("Exported CSV" ,
127
149
csv_export_count = exported_csv_dates .size ,
0 commit comments