Skip to content

Commit 5863680

Browse files
nolangormleyaysim319melange396
authored
Signal Documentation Coverage Endpoint (#1584)
new `geo_coverage` endpoint with backing table and views, plus a utility to compute the table contents, as well as tests --------- Co-authored-by: Amaris Sim <[email protected]> Co-authored-by: george <[email protected]>
1 parent 828e72a commit 5863680

File tree

7 files changed

+215
-0
lines changed

7 files changed

+215
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""Integration tests for the covidcast `geo_coverage` endpoint."""
2+
3+
# standard library
4+
import json
5+
import unittest
6+
7+
# third party
8+
import mysql.connector
9+
import requests
10+
11+
# first party
12+
from delphi_utils import Nans
13+
from delphi.epidata.client.delphi_epidata import Epidata
14+
import delphi.operations.secrets as secrets
15+
import delphi.epidata.acquisition.covidcast.database as live
16+
from delphi.epidata.maintenance.coverage_crossref_updater import main
17+
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
18+
19+
# use the local instance of the Epidata API
20+
BASE_URL = 'http://delphi_web_epidata/epidata' # NOSONAR
21+
22+
23+
class CoverageCrossrefTests(CovidcastBase):
24+
"""Tests coverage crossref updater."""
25+
26+
def localSetUp(self):
27+
"""Perform per-test setup."""
28+
self._db._cursor.execute('TRUNCATE TABLE `coverage_crossref`')
29+
30+
@staticmethod
31+
def _make_request(params):
32+
response = requests.get(f"{Epidata.BASE_URL}/covidcast/geo_coverage", params=params, auth=Epidata.auth)
33+
response.raise_for_status()
34+
return response.json()
35+
36+
def test_caching(self):
37+
"""Populate, query, cache, query, and verify the cache."""
38+
39+
# insert dummy data
40+
self._insert_rows([
41+
CovidcastTestRow.make_default_row(geo_type="state", geo_value="pa"),
42+
CovidcastTestRow.make_default_row(geo_type="state", geo_value="ny"),
43+
CovidcastTestRow.make_default_row(geo_type="state", geo_value="ny", signal="sig2"),
44+
])
45+
46+
results = self._make_request(params = {'geo': 'state:*'})
47+
48+
# make sure the tables are empty
49+
self.assertEqual(results, {
50+
'result': -2,
51+
'epidata': [],
52+
'message': 'no results',
53+
})
54+
55+
# update the coverage crossref table
56+
main()
57+
58+
results = self._make_request(params = {'geo': 'state:*'})
59+
60+
# make sure the data was actually served
61+
self.assertEqual(results, {
62+
'result': 1,
63+
'epidata': [{'signal': 'sig', 'source': 'src'}, {'signal': 'sig2', 'source': 'src'}],
64+
'message': 'success',
65+
})
66+
67+
results = self._make_request(params = {'geo': 'hrr:*'})
68+
69+
# make sure the tables are empty
70+
self.assertEqual(results, {
71+
'result': -2,
72+
'epidata': [],
73+
'message': 'no results',
74+
})
75+
76+
results = self._make_request(params = {'geo': 'state:pa'})
77+
78+
# make sure the data was actually served
79+
self.assertEqual(results, {
80+
'result': 1,
81+
'epidata': [{'signal': 'sig', 'source': 'src'}],
82+
'message': 'success',
83+
})
84+
85+
results = self._make_request(params = {'geo': 'state:ny'})
86+
87+
# make sure the data was actually served
88+
self.assertEqual(results, {
89+
'result': 1,
90+
'epidata': [{'signal': 'sig', 'source': 'src'}, {'signal': 'sig2', 'source': 'src'}],
91+
'message': 'success',
92+
})
93+

src/acquisition/covidcast/database.py

+33
Original file line numberDiff line numberDiff line change
@@ -561,3 +561,36 @@ def retrieve_covidcast_meta_cache(self):
561561
for entry in cache:
562562
cache_hash[(entry['data_source'], entry['signal'], entry['time_type'], entry['geo_type'])] = entry
563563
return cache_hash
564+
565+
def compute_coverage_crossref(self):
566+
"""Compute coverage_crossref table, for looking up available signals per geo or vice versa."""
567+
568+
logger = get_structured_logger("compute_coverage_crossref")
569+
570+
coverage_crossref_delete_sql = '''
571+
DELETE FROM coverage_crossref;
572+
'''
573+
574+
coverage_crossref_update_sql = '''
575+
INSERT INTO coverage_crossref (signal_key_id, geo_key_id, min_time_value, max_time_value)
576+
SELECT
577+
signal_key_id,
578+
geo_key_id,
579+
MIN(time_value) AS min_time_value,
580+
MAX(time_value) AS max_time_value
581+
FROM covid.epimetric_latest
582+
GROUP BY signal_key_id, geo_key_id;
583+
'''
584+
585+
self._connection.start_transaction()
586+
587+
self._cursor.execute(coverage_crossref_delete_sql)
588+
logger.info("coverage_crossref_delete", rows=self._cursor.rowcount)
589+
590+
self._cursor.execute(coverage_crossref_update_sql)
591+
logger.info("coverage_crossref_update", rows=self._cursor.rowcount)
592+
593+
self.commit()
594+
logger.info("coverage_crossref committed")
595+
596+
return self._cursor.rowcount

src/ddl/v4_schema.sql

+21
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,24 @@ CREATE TABLE `covidcast_meta_cache` (
164164
PRIMARY KEY (`timestamp`)
165165
) ENGINE=InnoDB;
166166
INSERT INTO covidcast_meta_cache VALUES (0, '[]');
167+
168+
CREATE TABLE `coverage_crossref` (
169+
`signal_key_id` bigint NOT NULL,
170+
`geo_key_id` bigint NOT NULL,
171+
`min_time_value` int NOT NULL,
172+
`max_time_value` int NOT NULL,
173+
UNIQUE INDEX coverage_crossref_geo_sig (`geo_key_id`, `signal_key_id`),
174+
INDEX coverage_crossref_sig_geo (`signal_key_id`, `geo_key_id`)
175+
) ENGINE=InnoDB;
176+
177+
CREATE OR REPLACE VIEW `coverage_crossref_v` AS
178+
SELECT
179+
`sd`.`source`,
180+
`sd`.`signal`,
181+
`gd`.`geo_type`,
182+
`gd`.`geo_value`,
183+
`cc`.`min_time_value`,
184+
`cc`.`max_time_value`
185+
FROM `coverage_crossref` `cc`
186+
JOIN `signal_dim` `sd` USING (`signal_key_id`)
187+
JOIN `geo_dim` `gd` USING (`geo_key_id`);

src/ddl/v4_schema_aliases.sql

+1
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
CREATE VIEW `epidata`.`epimetric_full_v` AS SELECT * FROM `covid`.`epimetric_full_v`;
99
CREATE VIEW `epidata`.`epimetric_latest_v` AS SELECT * FROM `covid`.`epimetric_latest_v`;
1010
CREATE VIEW `epidata`.`covidcast_meta_cache` AS SELECT * FROM `covid`.`covidcast_meta_cache`;
11+
CREATE VIEW `epidata`.`coverage_crossref_v` AS SELECT * FROM `covid`.`coverage_crossref_v`;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""Updates the table for the `coverage_crossref` endpoint."""
2+
3+
import time
4+
5+
from delphi.epidata.acquisition.covidcast.database import Database
6+
from delphi_utils import get_structured_logger
7+
8+
9+
def main():
10+
"""Updates the table for the `coverage_crossref`."""
11+
12+
logger = get_structured_logger("coverage_crossref_updater")
13+
start_time = time.time()
14+
database = Database()
15+
database.connect()
16+
17+
# compute and update coverage_crossref
18+
try:
19+
coverage = database.compute_coverage_crossref()
20+
finally:
21+
# clean up in success and in failure
22+
database.disconnect(True)
23+
24+
logger.info(f"coverage_crossref returned: {coverage}")
25+
26+
logger.info(
27+
"Generated and updated covidcast geo/signal coverage",
28+
total_runtime_in_seconds=round(time.time() - start_time, 2))
29+
return True
30+
31+
32+
if __name__ == '__main__':
33+
main()

src/server/endpoints/covidcast.py

+18
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,24 @@ def transform_row(row, proxy):
542542

543543
return execute_query(q.query, q.params, fields_string, fields_int, [], transform=transform_row)
544544

545+
@bp.route("/geo_coverage", methods=("GET", "POST"))
546+
def handle_geo_coverage():
547+
"""
548+
For a specific geo returns the signal coverage (number of signals for a given geo_type)
549+
"""
550+
551+
geo_sets = parse_geo_sets()
552+
553+
q = QueryBuilder("coverage_crossref_v", "c")
554+
fields_string = ["source", "signal"]
555+
556+
q.set_fields(fields_string)
557+
558+
q.apply_geo_filters("geo_type", "geo_value", geo_sets)
559+
q.set_sort_order("source", "signal")
560+
q.group_by = ["c." + field for field in fields_string] # this condenses duplicate results, similar to `SELECT DISTINCT`
561+
562+
return execute_query(q.query, q.params, fields_string, [], [])
545563

546564
@bp.route("/anomalies", methods=("GET", "POST"))
547565
def handle_anomalies():

tests/acquisition/covidcast/test_database.py

+16
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,22 @@ def test_update_covidcast_meta_cache_query(self):
7878
self.assertIn('timestamp', sql)
7979
self.assertIn('epidata', sql)
8080

81+
def test_compute_coverage_crossref_query(self):
82+
"""Query to update the compute crossref looks sensible.
83+
84+
NOTE: Actual behavior is tested by integration test.
85+
"""
86+
87+
mock_connector = MagicMock()
88+
database = Database()
89+
database.connect(connector_impl=mock_connector)
90+
91+
database.compute_coverage_crossref()
92+
93+
connection = mock_connector.connect()
94+
cursor = connection.cursor()
95+
self.assertTrue(cursor.execute.called)
96+
8197
def test_insert_or_update_batch_exception_reraised(self):
8298
"""Test that an exception is reraised"""
8399
mock_connector = MagicMock()

0 commit comments

Comments
 (0)