Skip to content

Commit 65e343f

Browse files
committed
NANs for CAN:
* add missing columns, allow nan values through
1 parent 9b75e07 commit 65e343f

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

Diff for: covid_act_now/delphi_covid_act_now/run.py

+24
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,33 @@
1010
from delphi_utils import (
1111
create_export_csv,
1212
S3ArchiveDiffer,
13+
Nans
1314
)
1415

1516
from .constants import GEO_RESOLUTIONS, SIGNALS
1617
from .geo import geo_map
1718
from .pull import load_data, extract_testing_metrics
1819

20+
def add_nancodes(df, signal):
21+
"""Add nancodes to the dataframe."""
22+
# Default missingness codes
23+
df["missing_val"] = Nans.NOT_MISSING
24+
df["missing_se"] = Nans.NOT_MISSING if signal == "pcr_tests_positive" else Nans.NOT_APPLICABLE
25+
df["missing_sample_size"] = (
26+
Nans.NOT_MISSING if signal == "pcr_tests_positive" else Nans.NOT_APPLICABLE
27+
)
28+
29+
# Mark any nans with unknown
30+
val_nans_mask = df["val"].isnull()
31+
df.loc[val_nans_mask, "missing_val"] = Nans.UNKNOWN
32+
if signal == "pcr_tests_positive":
33+
se_nans_mask = df["se"].isnull()
34+
df.loc[se_nans_mask, "missing_se"] = Nans.UNKNOWN
35+
sample_size_nans_mask = df["sample_size"].isnull()
36+
df.loc[sample_size_nans_mask, "missing_sample_size"] = Nans.UNKNOWN
37+
38+
return df
39+
1940
def run_module(params):
2041
"""
2142
Run the CAN testing metrics indicator.
@@ -56,9 +77,11 @@ def run_module(params):
5677
# Perform geo aggregations and export to receiving
5778
for geo_res in GEO_RESOLUTIONS:
5879
print(f"Processing {geo_res}")
80+
# breakpoint()
5981
df = geo_map(df_county_testing, geo_res)
6082

6183
# Export 'pcr_specimen_positivity_rate'
84+
df = add_nancodes(df, "pcr_tests_positive")
6285
exported_csv_dates = create_export_csv(
6386
df,
6487
export_dir=export_dir,
@@ -69,6 +92,7 @@ def run_module(params):
6992
df["val"] = df["sample_size"]
7093
df["sample_size"] = np.nan
7194
df["se"] = np.nan
95+
df = add_nancodes(df, "pcr_tests_total")
7296
exported_csv_dates = create_export_csv(
7397
df,
7498
export_dir=export_dir,

Diff for: covid_act_now/tests/test_run.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def test_output_files(self, clean_receiving_dir):
2121
run_module(self.PARAMS)
2222
csv_files = set(listdir("receiving"))
2323
csv_files.discard(".gitignore")
24+
today = pd.Timestamp.today().date().strftime("%Y%m%d")
2425

2526
expected_files = set()
2627
for signal in SIGNALS:
@@ -30,7 +31,11 @@ def test_output_files(self, clean_receiving_dir):
3031
# All output files exist
3132
assert csv_files == expected_files
3233

34+
expected_columns = [
35+
"geo_id", "val", "se", "sample_size",
36+
"missing_val", "missing_se", "missing_sample_size"
37+
]
3338
# All output files have correct columns
3439
for csv_file in csv_files:
3540
df = pd.read_csv(join("receiving", csv_file))
36-
assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all()
41+
assert (df.columns.values == expected_columns).all()

0 commit comments

Comments
 (0)