diff --git a/nhsn/delphi_nhsn/constants.py b/nhsn/delphi_nhsn/constants.py index da6216322..caa91746e 100644 --- a/nhsn/delphi_nhsn/constants.py +++ b/nhsn/delphi_nhsn/constants.py @@ -1,5 +1,7 @@ """Registry for signal names.""" +from datetime import timedelta + GEOS = ["state", "nation", "hhs"] MAIN_DATASET_ID = "ua7e-t2fy" @@ -62,3 +64,5 @@ f"{NUM_HOSP_REPORTING_FLU}_prelim": float, f"{NUM_HOSP_REPORTING_RSV}_prelim": float, } + +RECENTLY_UPDATED_DIFF = timedelta(days=1) diff --git a/nhsn/delphi_nhsn/pull.py b/nhsn/delphi_nhsn/pull.py index 31164770e..69e6d56fd 100644 --- a/nhsn/delphi_nhsn/pull.py +++ b/nhsn/delphi_nhsn/pull.py @@ -4,16 +4,25 @@ import logging import random import time -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Optional from urllib.error import HTTPError import pandas as pd +from delphi_epidata import Epidata from delphi_utils import create_backup_csv from sodapy import Socrata -from .constants import MAIN_DATASET_ID, PRELIM_DATASET_ID, PRELIM_SIGNALS_MAP, PRELIM_TYPE_DICT, SIGNALS_MAP, TYPE_DICT +from .constants import ( + MAIN_DATASET_ID, + PRELIM_DATASET_ID, + PRELIM_SIGNALS_MAP, + PRELIM_TYPE_DICT, + RECENTLY_UPDATED_DIFF, + SIGNALS_MAP, + TYPE_DICT, +) def check_last_updated(socrata_token, dataset_id, logger): @@ -38,17 +47,31 @@ def check_last_updated(socrata_token, dataset_id, logger): client = Socrata("data.cdc.gov", socrata_token) response = client.get_metadata(dataset_id) - updated_timestamp = datetime.utcfromtimestamp(int(response["rowsUpdatedAt"])) - now = datetime.utcnow() - recently_updated_source = (now - updated_timestamp) < timedelta(days=1) + updated_timestamp = datetime.fromtimestamp(int(response["rowsUpdatedAt"]), tz=timezone.utc) + + # pulling last updated from the api + meta_df = pd.DataFrame(Epidata.covidcast_meta()["epidata"]) + signal_suffix = "prelim" if dataset_id == PRELIM_DATASET_ID else "ew" + nhsn_meta_df = meta_df[(meta_df["data_source"] == "nhsn") & (meta_df["signal"].str.endswith(signal_suffix))] + est = timezone(timedelta(hours=-5)) + last_updated = datetime.fromtimestamp(nhsn_meta_df["last_update"].min(), tz=est) + + # currently set to run twice a week, RECENTLY_UPDATED_DIFF may need adjusting based on the cadence + recently_updated_source = (updated_timestamp - last_updated) > RECENTLY_UPDATED_DIFF prelim_prefix = "Preliminary " if dataset_id == PRELIM_DATASET_ID else "" if recently_updated_source: logger.info( - f"{prelim_prefix}NHSN data was recently updated; Pulling data", updated_timestamp=updated_timestamp + f"{prelim_prefix}NHSN data was recently updated; Pulling data", + updated_timestamp=updated_timestamp, + metadata_timestamp=last_updated, ) else: - logger.info(f"{prelim_prefix}NHSN data is stale; Skipping", updated_timestamp=updated_timestamp) + logger.info( + f"{prelim_prefix}NHSN data is stale; Skipping", + updated_timestamp=updated_timestamp, + metadata_timestamp=last_updated, + ) # pylint: disable=W0703 except Exception as e: logger.info("error while processing socrata metadata; treating data as stale", error=str(e)) diff --git a/nhsn/tests/conftest.py b/nhsn/tests/conftest.py index b321f1236..5d0f49ce8 100644 --- a/nhsn/tests/conftest.py +++ b/nhsn/tests/conftest.py @@ -23,6 +23,11 @@ with open(f"{TEST_DIR}/test_data/prelim_page.json", "r") as f: PRELIM_TEST_DATA = json.load(f) +# filtered metadata (just includes nhsn meta) +with open(f"{TEST_DIR}/test_data/covidcast_meta.json", "r") as f: + COVID_META_DATA = json.load(f) + + @pytest.fixture(scope="session") def params(): params = { @@ -62,7 +67,8 @@ def params_w_patch(params): @pytest.fixture(scope="function") def run_as_module(params): with patch('sodapy.Socrata.get') as mock_get, \ - patch('sodapy.Socrata.get_metadata') as mock_get_metadata: + patch('sodapy.Socrata.get_metadata') as mock_get_metadata, \ + patch('delphi_nhsn.pull.Epidata.covidcast_meta') as mock_covidcast_meta: def side_effect(*args, **kwargs): if kwargs['offset'] == 0: if "ua7e-t2fy" in args[0]: @@ -73,5 +79,6 @@ def side_effect(*args, **kwargs): return [] mock_get.side_effect = side_effect mock_get_metadata.return_value = {"rowsUpdatedAt": time.time()} + mock_covidcast_meta.return_value = COVID_META_DATA run_module(params) diff --git a/nhsn/tests/test_data/covidcast_meta.json b/nhsn/tests/test_data/covidcast_meta.json new file mode 100644 index 000000000..998d3e820 --- /dev/null +++ b/nhsn/tests/test_data/covidcast_meta.json @@ -0,0 +1,619 @@ +{ + "epidata": + [ + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 41214.0, + "mean_value": 3231.6440329, + "min_lag": 2, + "min_time": 202032, + "min_value": 17.0, + "num_locations": 10, + "signal": "confirmed_admissions_covid_ew", + "stdev_value": 4663.7857901, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 156269.0, + "mean_value": 32316.4403292, + "min_lag": 2, + "min_time": 202032, + "min_value": 1972.0, + "num_locations": 1, + "signal": "confirmed_admissions_covid_ew", + "stdev_value": 29747.0960788, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 17256.0, + "mean_value": 589.2470173, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_covid_ew", + "stdev_value": 1207.2145303, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 11524.0, + "mean_value": 445.562963, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "confirmed_admissions_flu_ew", + "stdev_value": 1089.5293067, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 54353.0, + "mean_value": 4455.6296296, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "confirmed_admissions_flu_ew", + "stdev_value": 8916.7719395, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 4691.0, + "mean_value": 84.3303996, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_flu_ew", + "stdev_value": 273.1056648, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202512, + "max_lag": 244, + "max_time": 202513, + "max_value": 11524.0, + "mean_value": 446.5401639, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "confirmed_admissions_flu_ew_prelim", + "stdev_value": 1087.8572053, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1742995536, + "max_issue": 202512, + "max_lag": 244, + "max_time": 202514, + "max_value": 54374.0, + "mean_value": 4465.4016393, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "confirmed_admissions_flu_ew_prelim", + "stdev_value": 8902.0785394, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202512, + "max_lag": 244, + "max_time": 202514, + "max_value": 4691.0, + "mean_value": 84.5011633, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_flu_ew_prelim", + "stdev_value": 272.713479, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 3386.0, + "mean_value": 67.7148148, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "confirmed_admissions_rsv_ew", + "stdev_value": 276.3491733, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 15402.0, + "mean_value": 2109.5769231, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "confirmed_admissions_rsv_ew", + "stdev_value": 3701.788104, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 1286.0, + "mean_value": 53.1310946, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_rsv_ew", + "stdev_value": 131.5669637, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 3386.0, + "mean_value": 68.6311475, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "confirmed_admissions_rsv_ew_prelim", + "stdev_value": 276.3551187, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 15402.0, + "mean_value": 2119.7468354, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "confirmed_admissions_rsv_ew_prelim", + "stdev_value": 3680.5022272, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 1286.0, + "mean_value": 53.1281726, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_rsv_ew_prelim", + "stdev_value": 130.6219895, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 1037.0, + "mean_value": 502.2559671, + "min_lag": 2, + "min_time": 202032, + "min_value": 15.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_covid_ew", + "stdev_value": 292.6534298, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 5462.0, + "mean_value": 5022.5596708, + "min_lag": 2, + "min_time": 202032, + "min_value": 966.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_covid_ew", + "stdev_value": 1032.3401836, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 495.0, + "mean_value": 89.6951569, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_covid_ew", + "stdev_value": 85.6518225, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 1037.0, + "mean_value": 502.2758197, + "min_lag": 1, + "min_time": 202032, + "min_value": 15.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_covid_ew_prelim", + "stdev_value": 292.5640713, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743600336, + "max_issue": 202515, + "max_lag": 244, + "max_time": 202514, + "max_value": 5462.0, + "mean_value": 5022.7581967, + "min_lag": 1, + "min_time": 202032, + "min_value": 976.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_covid_ew_prelim", + "stdev_value": 1030.2748494, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 495.0, + "mean_value": 89.6921107, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_covid_ew_prelim", + "stdev_value": 85.6365269, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 1037.0, + "mean_value": 461.3489712, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_flu_ew", + "stdev_value": 295.9522902, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 5461.0, + "mean_value": 4613.4897119, + "min_lag": 2, + "min_time": 202032, + "min_value": 5.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_flu_ew", + "stdev_value": 1419.4719106, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 495.0, + "mean_value": 82.3897994, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_flu_ew", + "stdev_value": 82.9412078, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 41214.0, + "mean_value": 3221.1389344, + "min_lag": 1, + "min_time": 202032, + "min_value": 17.0, + "num_locations": 10, + "signal": "confirmed_admissions_covid_ew_prelim", + "stdev_value": 4657.1833354, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 156269.0, + "mean_value": 32211.3893443, + "min_lag": 1, + "min_time": 202032, + "min_value": 1979.0, + "num_locations": 1, + "signal": "confirmed_admissions_covid_ew_prelim", + "stdev_value": 29731.3030329, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 17256.0, + "mean_value": 587.3246899, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_covid_ew_prelim", + "stdev_value": 1205.1369519, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 1037.0, + "mean_value": 461.5364754, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_flu_ew_prelim", + "stdev_value": 295.8647556, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 5461.0, + "mean_value": 4615.3647541, + "min_lag": 1, + "min_time": 202032, + "min_value": 5.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_flu_ew_prelim", + "stdev_value": 1416.8963319, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 495.0, + "mean_value": 82.4172278, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_flu_ew_prelim", + "stdev_value": 82.9379752, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 1037.0, + "mean_value": 56.6061728, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_rsv_ew", + "stdev_value": 173.6300832, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 5383.0, + "mean_value": 566.0617284, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_rsv_ew", + "stdev_value": 1494.1846698, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 472.0, + "mean_value": 10.108988, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_rsv_ew", + "stdev_value": 38.2887186, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 1037.0, + "mean_value": 58.452459, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_rsv_ew_prelim", + "stdev_value": 176.4958952, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743600336, + "max_issue": 202515, + "max_lag": 244, + "max_time": 202514, + "max_value": 5383.0, + "mean_value": 584.5245902, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_rsv_ew_prelim", + "stdev_value": 1518.6735056, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 472.0, + "mean_value": 10.4379391, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_rsv_ew_prelim", + "stdev_value": 38.9065954, + "time_type": "week" + } + ], + "message": "success", + "result": 1 +} \ No newline at end of file diff --git a/nhsn/tests/test_pull.py b/nhsn/tests/test_pull.py index f8e27868b..e77b8e622 100644 --- a/nhsn/tests/test_pull.py +++ b/nhsn/tests/test_pull.py @@ -1,5 +1,6 @@ import glob import time +from datetime import datetime, timedelta from unittest.mock import patch, MagicMock import os import pytest @@ -12,10 +13,10 @@ pull_data_from_file, check_last_updated ) -from delphi_nhsn.constants import TYPE_DICT, PRELIM_TYPE_DICT, PRELIM_DATASET_ID, MAIN_DATASET_ID +from delphi_nhsn.constants import TYPE_DICT, PRELIM_TYPE_DICT, PRELIM_DATASET_ID, MAIN_DATASET_ID, RECENTLY_UPDATED_DIFF from delphi_utils import get_structured_logger -from conftest import TEST_DATA, PRELIM_TEST_DATA, TEST_DIR +from conftest import TEST_DATA, PRELIM_TEST_DATA, TEST_DIR, COVID_META_DATA DATASETS = [{"id":MAIN_DATASET_ID, "test_data": TEST_DATA, @@ -79,14 +80,16 @@ def test_pull_from_file(self, caplog, dataset, params_w_patch): @patch("delphi_nhsn.pull.Socrata") @patch("delphi_nhsn.pull.create_backup_csv") + @patch("delphi_nhsn.pull.Epidata.covidcast_meta") @pytest.mark.parametrize('dataset', DATASETS, ids=["data", "prelim_data"]) - def test_pull_nhsn_data_output(self, mock_create_backup, mock_socrata, dataset, caplog, params): + def test_pull_nhsn_data_output(self, mock_covidcast_meta, mock_create_backup, mock_socrata, dataset, caplog, params): now = time.time() # Mock Socrata client and its get method mock_client = MagicMock() mock_socrata.return_value = mock_client mock_client.get.side_effect = [dataset["test_data"],[]] mock_client.get_metadata.return_value = {"rowsUpdatedAt": now} + mock_covidcast_meta.return_value = COVID_META_DATA backup_dir = params["common"]["backup_dir"] test_token = params["indicator"]["socrata_token"] @@ -158,21 +161,34 @@ def test_pull_nhsn_data_backup(self, mock_socrata, dataset, caplog, params): @pytest.mark.parametrize('dataset', DATASETS, ids=["data", "prelim_data"]) - @pytest.mark.parametrize("updatedAt", [time.time(), time.time() - 172800], ids=["updated", "stale"]) + @pytest.mark.parametrize("updatedAt", [datetime(year=2025, month=4, day=4, hour=12, minute=30), + # called off-cycle (checks for main update on wednesday, but updates on friday) + datetime(year=2025, month=3, day=28, hour=12, minute=30), + # called off-cycle (checks for main update on wednesday, but the update got skipped) + datetime(year=2025, month=4, day=4, hour=13, minute=30), + ], ids=["updated", "stale", "updated_late"]) @patch("delphi_nhsn.pull.Socrata") - def test_check_last_updated(self, mock_socrata, dataset, updatedAt, caplog): + @patch("delphi_nhsn.pull.Epidata.covidcast_meta") + def test_check_last_updated(self, mock_covidcast_meta, mock_socrata, dataset, updatedAt, caplog): mock_client = MagicMock() mock_socrata.return_value = mock_client - mock_client.get_metadata.return_value = {"rowsUpdatedAt": updatedAt } - logger = get_structured_logger() + mock_covidcast_meta.return_value = COVID_META_DATA + + # preliminary data is updated on wednesdays + if dataset["prelim_flag"]: + updatedAt = updatedAt - timedelta(days=2) + mock_client.get_metadata.return_value = {"rowsUpdatedAt": updatedAt.timestamp()} + logger = get_structured_logger() check_last_updated(mock_client, dataset["id"], logger) # Check that get method was called with correct arguments - now = time.time() - if now - updatedAt < 60: + last_updated = datetime(2025, 3, 28, 13, 25, 36) + if (updatedAt - last_updated) > RECENTLY_UPDATED_DIFF: assert f"{dataset['msg_prefix']}NHSN data was recently updated; Pulling data" in caplog.text else: stale_msg = f"{dataset['msg_prefix']}NHSN data is stale; Skipping" assert stale_msg in caplog.text + +