remove saving to csv

cchuong · cchuong · commit dc14c71db2cb · 2025-05-15T16:35:59.000-07:00
diff --git a/src/acquisition/rvdss/run.py b/src/acquisition/rvdss/run.py
@@ -11,6 +11,7 @@
 from delphi.epidata.acquisition.rvdss.utils import fetch_dashboard_data, check_most_recent_update_date,get_dashboard_update_date
 from delphi.epidata.acquisition.rvdss.constants import DASHBOARD_BASE_URL, RESP_DETECTIONS_OUTPUT_FILE, POSITIVE_TESTS_OUTPUT_FILE, COUNTS_OUTPUT_FILE,UPDATE_DATES_FILE
 from delphi.epidata.acquisition.rvdss.pull_historic import fetch_report_data,fetch_historical_dashboard_data
+from delphi.epidata.acquisition.rvdss.database import respiratory_detections_cols, pct_positive_cols, detections_counts_cols, expected_table_names, expected_columns, get_num_rows, update
 
 def update_current_data():
 
@@ -26,69 +27,43 @@ def update_current_data():
         with open(UPDATE_DATES_FILE, 'a') as testfile:
             testfile.write(update_date+ "\n")
 
-        ## TODO: what is the base path for these files?
-        base_path = "."
 
         data_dict = fetch_dashboard_data(DASHBOARD_BASE_URL)
-
-        table_types = {
-            "respiratory_detection": RESP_DETECTIONS_OUTPUT_FILE,
-            "positive": POSITIVE_TESTS_OUTPUT_FILE,
-            # "count": COUNTS_OUTPUT_FILE, # Dashboards don't contain this data.
-        }
-        for tt in table_types.keys():
-            data = data_dict[tt]
-
-            # Write the tables to separate csvs
-            path = base_path + "/" + table_types[tt]
-
-            # Since this function generates new data weekly, we need to combine it with the existing data, if it exists.
-            if not os.path.exists(path):
-                data.to_csv(path,index=True)
-            else:
-                old_data = pd.read_csv(path).set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
-
-                # If index already exists in the data on disk, don't add the new data -- we may have already run the weekly data fetch.
-                ## TODO: The check on index maybe should be stricter? Although we do deduplication upstream, so this probably won't find true duplicates
-                if not data.index.isin(old_data.index).any():
-                    old_data= pd.concat([old_data,data],axis=0)
-                    old_data.to_csv(path,index=True)
-
-            # ## TODO
-            # update_database(data)
+        ## TODO
+        update(data_dict)
     else:
         print("Data is already up to date")
 
 def update_historical_data():
-    ## TODO: what is the base path for these files?
-    base_path = "."
-
     report_dict_list = fetch_report_data() # a dict for every season, and every seasonal dict has 2/3 tables inside
 
     # a dict with an entry for every week that has an archival dashboard, and each entry has 2/3 tables
     dashboard_dict_list = fetch_historical_dashboard_data()
-
+    
     table_types = {
-        "respiratory_detection": RESP_DETECTIONS_OUTPUT_FILE,
-        "positive": POSITIVE_TESTS_OUTPUT_FILE,
-        "count": COUNTS_OUTPUT_FILE,
+    "respiratory_detection": RESP_DETECTIONS_OUTPUT_FILE,
+    "positive": POSITIVE_TESTS_OUTPUT_FILE,
+    "count": COUNTS_OUTPUT_FILE
     }
+    
+    hist_dict_list = {}
     for tt in table_types.keys():
         # Merge tables together from dashboards and reports for each table type.
         dashboard_data = [elem.get(tt, pd.DataFrame()) for elem in dashboard_dict_list] # a list of all the dashboard tables
         report_data = [elem.get(tt, None) for elem in report_dict_list] # a list of the report table
 
         all_report_tables = pd.concat(report_data)
         all_dashboard_tables = pd.concat(dashboard_data)
+       
+        if all_dashboard_tables.empty == False and all_report_tables.empty == False:
+            all_dashboard_tables=all_dashboard_tables.reset_index()
+            all_dashboard_tables=all_dashboard_tables.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
 
-        data = pd.concat([all_report_tables, all_dashboard_tables])
-
-        # Write the tables to separate csvs
-        if not data.empty:
-            data.to_csv(base_path +"/" + table_types[tt], index=True)
+        hist_dict_list[tt] = pd.concat([all_report_tables, all_dashboard_tables])
 
-        # ## TODO
-        # update_database(data)
+    #update database
+    update(hist_dict_list)
+    
 
 
 def main():