10
10
import covidcast
11
11
12
12
import numpy as np
13
+ import pandas as pd
13
14
from delphi_utils import (
14
15
create_export_csv ,
15
16
geomap ,
16
- get_structured_logger
17
+ get_structured_logger ,
18
+ Nans
17
19
)
18
20
19
21
from .constants import (METRICS , COMBINED_METRIC ,
22
24
from .pull import pull_gs_data
23
25
24
26
27
+ def add_nancodes (df , smoother ):
28
+ """Add nancodes to the dataframe."""
29
+ idx = pd .IndexSlice
30
+
31
+ # Default missingness codes
32
+ df ["missing_val" ] = Nans .NOT_MISSING
33
+ df ["missing_se" ] = Nans .NOT_APPLICABLE
34
+ df ["missing_sample_size" ] = Nans .NOT_APPLICABLE
35
+
36
+ # Mark early smoothing entries as data insufficient
37
+ if smoother == "smoothed" :
38
+ df .sort_index (inplace = True )
39
+ min_time_value = df .index .min ()[0 ] + 5 * pd .Timedelta (days = 1 )
40
+ df .loc [idx [:min_time_value , :], "missing_val" ] = Nans .PRIVACY
41
+
42
+ # Mark any remaining nans with unknown
43
+ remaining_nans_mask = df ["val" ].isnull () & df ["missing_val" ].eq (Nans .NOT_MISSING )
44
+ df .loc [remaining_nans_mask , "missing_val" ] = Nans .UNKNOWN
45
+ return df
46
+
25
47
def run_module (params ):
26
48
"""
27
49
Run Google Symptoms module.
@@ -92,8 +114,7 @@ def run_module(params):
92
114
).transform (SMOOTHERS_MAP [smoother ][0 ])
93
115
df ["se" ] = np .nan
94
116
df ["sample_size" ] = np .nan
95
- # Drop early entries where data insufficient for smoothing
96
- df = df .loc [~ df ["val" ].isnull (), :]
117
+ df = add_nancodes (df , smoother )
97
118
df = df .reset_index ()
98
119
sensor_name = "_" .join ([smoother , "search" ])
99
120
@@ -105,7 +126,8 @@ def run_module(params):
105
126
start_date = SMOOTHERS_MAP [smoother ][1 ](export_start_date ),
106
127
metric = metric .lower (),
107
128
geo_res = geo_res ,
108
- sensor = sensor_name )
129
+ sensor = sensor_name ,
130
+ logger = logger )
109
131
110
132
if not exported_csv_dates .empty :
111
133
csv_export_count += exported_csv_dates .size
0 commit comments