Skip to content

Commit dc14c71

Browse files
committed
remove saving to csv
1 parent 02de2eb commit dc14c71

File tree

1 file changed

+17
-42
lines changed

1 file changed

+17
-42
lines changed

src/acquisition/rvdss/run.py

Lines changed: 17 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from delphi.epidata.acquisition.rvdss.utils import fetch_dashboard_data, check_most_recent_update_date,get_dashboard_update_date
1212
from delphi.epidata.acquisition.rvdss.constants import DASHBOARD_BASE_URL, RESP_DETECTIONS_OUTPUT_FILE, POSITIVE_TESTS_OUTPUT_FILE, COUNTS_OUTPUT_FILE,UPDATE_DATES_FILE
1313
from delphi.epidata.acquisition.rvdss.pull_historic import fetch_report_data,fetch_historical_dashboard_data
14+
from delphi.epidata.acquisition.rvdss.database import respiratory_detections_cols, pct_positive_cols, detections_counts_cols, expected_table_names, expected_columns, get_num_rows, update
1415

1516
def update_current_data():
1617

@@ -26,69 +27,43 @@ def update_current_data():
2627
with open(UPDATE_DATES_FILE, 'a') as testfile:
2728
testfile.write(update_date+ "\n")
2829

29-
## TODO: what is the base path for these files?
30-
base_path = "."
3130

3231
data_dict = fetch_dashboard_data(DASHBOARD_BASE_URL)
33-
34-
table_types = {
35-
"respiratory_detection": RESP_DETECTIONS_OUTPUT_FILE,
36-
"positive": POSITIVE_TESTS_OUTPUT_FILE,
37-
# "count": COUNTS_OUTPUT_FILE, # Dashboards don't contain this data.
38-
}
39-
for tt in table_types.keys():
40-
data = data_dict[tt]
41-
42-
# Write the tables to separate csvs
43-
path = base_path + "/" + table_types[tt]
44-
45-
# Since this function generates new data weekly, we need to combine it with the existing data, if it exists.
46-
if not os.path.exists(path):
47-
data.to_csv(path,index=True)
48-
else:
49-
old_data = pd.read_csv(path).set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
50-
51-
# If index already exists in the data on disk, don't add the new data -- we may have already run the weekly data fetch.
52-
## TODO: The check on index maybe should be stricter? Although we do deduplication upstream, so this probably won't find true duplicates
53-
if not data.index.isin(old_data.index).any():
54-
old_data= pd.concat([old_data,data],axis=0)
55-
old_data.to_csv(path,index=True)
56-
57-
# ## TODO
58-
# update_database(data)
32+
## TODO
33+
update(data_dict)
5934
else:
6035
print("Data is already up to date")
6136

6237
def update_historical_data():
63-
## TODO: what is the base path for these files?
64-
base_path = "."
65-
6638
report_dict_list = fetch_report_data() # a dict for every season, and every seasonal dict has 2/3 tables inside
6739

6840
# a dict with an entry for every week that has an archival dashboard, and each entry has 2/3 tables
6941
dashboard_dict_list = fetch_historical_dashboard_data()
70-
42+
7143
table_types = {
72-
"respiratory_detection": RESP_DETECTIONS_OUTPUT_FILE,
73-
"positive": POSITIVE_TESTS_OUTPUT_FILE,
74-
"count": COUNTS_OUTPUT_FILE,
44+
"respiratory_detection": RESP_DETECTIONS_OUTPUT_FILE,
45+
"positive": POSITIVE_TESTS_OUTPUT_FILE,
46+
"count": COUNTS_OUTPUT_FILE
7547
}
48+
49+
hist_dict_list = {}
7650
for tt in table_types.keys():
7751
# Merge tables together from dashboards and reports for each table type.
7852
dashboard_data = [elem.get(tt, pd.DataFrame()) for elem in dashboard_dict_list] # a list of all the dashboard tables
7953
report_data = [elem.get(tt, None) for elem in report_dict_list] # a list of the report table
8054

8155
all_report_tables = pd.concat(report_data)
8256
all_dashboard_tables = pd.concat(dashboard_data)
57+
58+
if all_dashboard_tables.empty == False and all_report_tables.empty == False:
59+
all_dashboard_tables=all_dashboard_tables.reset_index()
60+
all_dashboard_tables=all_dashboard_tables.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
8361

84-
data = pd.concat([all_report_tables, all_dashboard_tables])
85-
86-
# Write the tables to separate csvs
87-
if not data.empty:
88-
data.to_csv(base_path +"/" + table_types[tt], index=True)
62+
hist_dict_list[tt] = pd.concat([all_report_tables, all_dashboard_tables])
8963

90-
# ## TODO
91-
# update_database(data)
64+
#update database
65+
update(hist_dict_list)
66+
9267

9368

9469
def main():

0 commit comments

Comments
 (0)