Skip to content

Commit 1dd18fc

Browse files
committed
wip
1 parent 3d209c8 commit 1dd18fc

File tree

1 file changed

+140
-0
lines changed

1 file changed

+140
-0
lines changed

Diff for: get_forecast_data.r

+140
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
library(tidyverse)
2+
library(httr)
3+
library(lubridate)
4+
library(progress)
5+
6+
options(readr.show_progress = FALSE)
7+
options(readr.show_col_types = FALSE)
8+
9+
10+
# Configuration
11+
config <- list(
12+
base_url = "https://raw.githubusercontent.com/cdcepi/FluSight-forecast-hub/main/model-output",
13+
forecasters = c("CMU-TimeSeries", "FluSight-baseline", "FluSight-ensemble", "FluSight-base_seasonal", "UMass-flusion"),
14+
local_storage = "data/forecasts",
15+
tracking_file = "data/download_tracking.csv"
16+
)
17+
18+
# Function to ensure directory structure exists
19+
setup_directories <- function(base_dir) {
20+
dir.create(file.path(base_dir), recursive = TRUE, showWarnings = FALSE)
21+
for (forecaster in config$forecasters) {
22+
dir.create(file.path(base_dir, forecaster), recursive = TRUE, showWarnings = FALSE)
23+
}
24+
}
25+
26+
# Function to load tracking data
27+
load_tracking_data <- function() {
28+
if (file.exists(config$tracking_file)) {
29+
read_csv(config$tracking_file)
30+
} else {
31+
tibble(
32+
forecaster = character(),
33+
filename = character(),
34+
download_date = character(),
35+
status = character()
36+
)
37+
}
38+
}
39+
40+
# Function to generate possible filenames for a date range
41+
generate_filenames <- function(start_date, end_date, forecaster) {
42+
dates <- seq(as_date(start_date), as_date(end_date), by = "week")
43+
filenames <- paste0(
44+
format(dates, "%Y-%m-%d"),
45+
"-",
46+
forecaster,
47+
".csv"
48+
)
49+
return(filenames)
50+
}
51+
52+
# Function to check if file exists on GitHub
53+
check_github_file <- function(forecaster, filename) {
54+
url <- paste0(config$base_url, "/", forecaster, "/", filename)
55+
response <- GET(url)
56+
return(status_code(response) == 200)
57+
}
58+
59+
# Function to download a single file
60+
download_forecast_file <- function(forecaster, filename) {
61+
url <- paste0(config$base_url, "/", forecaster, "/", filename)
62+
local_path <- file.path(config$local_storage, forecaster, filename)
63+
64+
tryCatch(
65+
{
66+
download.file(url, local_path, mode = "wb", quiet = TRUE)
67+
return("success")
68+
},
69+
error = function(e) {
70+
return("failed")
71+
}
72+
)
73+
}
74+
75+
# Main function to update forecast files
76+
update_forecast_files <- function(days_back = 30) {
77+
# Setup
78+
setup_directories(config$local_storage)
79+
tracking_data <- load_tracking_data()
80+
81+
# Generate date range
82+
end_date <- Sys.Date()
83+
start_date <- get_forecast_reference_date(end_date - days_back)
84+
85+
# Process each forecaster
86+
new_tracking_records <- list()
87+
88+
pb_forecasters <- progress_bar$new(
89+
format = "Downloading forecasts from :forecaster [:bar] :percent :eta",
90+
total = length(config$forecasters),
91+
clear = FALSE,
92+
width = 60
93+
)
94+
95+
for (forecaster in config$forecasters) {
96+
pb_forecasters$tick(tokens = list(forecaster = forecaster))
97+
98+
# Get potential filenames
99+
filenames <- generate_filenames(start_date, end_date, forecaster)
100+
101+
# Filter out already downloaded files
102+
existing_files <- tracking_data %>%
103+
filter(forecaster == !!forecaster, status == "success") %>%
104+
pull(filename)
105+
106+
new_files <- setdiff(filenames, existing_files)
107+
108+
if (length(new_files) > 0) {
109+
# Create nested progress bar for files
110+
pb_files <- progress_bar$new(
111+
format = " Downloading files [:bar] :current/:total :filename",
112+
total = length(new_files)
113+
)
114+
115+
for (filename in new_files) {
116+
pb_files$tick(tokens = list(filename = filename))
117+
118+
if (check_github_file(forecaster, filename)) {
119+
status <- download_forecast_file(forecaster, filename)
120+
121+
new_tracking_records[[length(new_tracking_records) + 1]] <- tibble(
122+
forecaster = forecaster,
123+
filename = filename,
124+
download_date = as.character(Sys.time()),
125+
status = status
126+
)
127+
}
128+
}
129+
}
130+
}
131+
132+
# Update tracking data
133+
if (length(new_tracking_records) > 0) {
134+
new_tracking_data <- bind_rows(new_tracking_records)
135+
tracking_data <- bind_rows(tracking_data, new_tracking_data)
136+
write_csv(tracking_data, config$tracking_file)
137+
}
138+
139+
return(tracking_data)
140+
}

0 commit comments

Comments
 (0)