Skip to content

Commit 5fca863

Browse files
authored
Merge pull request #202 from cmu-delphi/ds/format
lint: format repo with air
2 parents f55cb00 + eb567fe commit 5fca863

39 files changed

+1162
-717
lines changed

.git-blame-ignore-revs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
f49bc9845a78c138f9816335c2c05febd4630ada

R/aux_data_utils.R

Lines changed: 80 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@ convert_epiweek_to_season <- function(epiyear, epiweek) {
88
}
99

1010
epiweeks_in_year <- function(year) {
11-
last_week_of_year <- seq.Date(as.Date(paste0(year, "-12-24")),
12-
as.Date(paste0(year, "-12-31")),
13-
by = 1
14-
)
11+
last_week_of_year <- seq.Date(as.Date(paste0(year, "-12-24")), as.Date(paste0(year, "-12-31")), by = 1)
1512
return(max(as.numeric(MMWRweek::MMWRweek(last_week_of_year)$MMWRweek)))
1613
}
1714

@@ -73,17 +70,20 @@ step_season_week_sine <- function(preproc, season = 35) {
7370
#' but for now it's not worth the time
7471
#' @param original_dataset tibble or epi_df, should have states as 2 letter lower case
7572
add_pop_and_density <-
76-
function(original_dataset,
77-
apportion_filename = here::here("aux_data", "flusion_data", "apportionment.csv"),
78-
state_code_filename = here::here("aux_data", "flusion_data", "state_codes_table.csv"),
79-
hhs_code_filename = here::here("aux_data", "flusion_data", "state_code_hhs_table.csv")) {
73+
function(
74+
original_dataset,
75+
apportion_filename = here::here("aux_data", "flusion_data", "apportionment.csv"),
76+
state_code_filename = here::here("aux_data", "flusion_data", "state_codes_table.csv"),
77+
hhs_code_filename = here::here("aux_data", "flusion_data", "state_code_hhs_table.csv")
78+
) {
8079
pops_by_state_hhs <- gen_pop_and_density_data(apportion_filename, state_code_filename, hhs_code_filename)
8180
# if the dataset uses "usa" instead of "us", substitute that
8281
if ("usa" %in% unique(original_dataset)$geo_value) {
8382
pops_by_state_hhs %<>%
8483
mutate(
8584
geo_value = ifelse(geo_value == "us", "usa", geo_value),
86-
agg_level = ifelse(grepl("[0-9]{2}", geo_value),
85+
agg_level = ifelse(
86+
grepl("[0-9]{2}", geo_value),
8787
"hhs_region",
8888
ifelse(("us" == geo_value) | ("usa" == geo_value), "nation", "state")
8989
)
@@ -107,17 +107,21 @@ add_pop_and_density <-
107107

108108
add_agg_level <- function(data) {
109109
data %>%
110-
mutate(agg_level = case_when(
111-
grepl("[0-9]{2}", geo_value) ~ "hhs_region",
112-
geo_value %in% c("us", "usa") ~ "nation",
113-
.default = "state"
114-
))
110+
mutate(
111+
agg_level = case_when(
112+
grepl("[0-9]{2}", geo_value) ~ "hhs_region",
113+
geo_value %in% c("us", "usa") ~ "nation",
114+
.default = "state"
115+
)
116+
)
115117
}
116118

117119
gen_pop_and_density_data <-
118-
function(apportion_filename = here::here("aux_data", "flusion_data", "apportionment.csv"),
119-
state_code_filename = here::here("aux_data", "flusion_data", "state_codes_table.csv"),
120-
hhs_code_filename = here::here("aux_data", "flusion_data", "state_code_hhs_table.csv")) {
120+
function(
121+
apportion_filename = here::here("aux_data", "flusion_data", "apportionment.csv"),
122+
state_code_filename = here::here("aux_data", "flusion_data", "state_codes_table.csv"),
123+
hhs_code_filename = here::here("aux_data", "flusion_data", "state_code_hhs_table.csv")
124+
) {
121125
apportionment_data <- readr::read_csv(apportion_filename, show_col_types = FALSE) %>% as_tibble()
122126
imputed_pop_data <- apportionment_data %>%
123127
filter(`Geography Type` %in% c("State", "Nation")) %>%
@@ -217,11 +221,13 @@ daily_to_weekly <- function(epi_df, agg_method = c("sum", "mean"), keys = "geo_v
217221
#' Note that this is 1-indexed, so 1 = Sunday, 2 = Monday, ..., 7 = Saturday.
218222
#' @param week_start the day of the week to use as the start of the week (Sunday is default).
219223
#' Note that this is 1-indexed, so 1 = Sunday, 2 = Monday, ..., 7 = Saturday.
220-
daily_to_weekly_archive <- function(epi_arch,
221-
agg_columns,
222-
agg_method = c("sum", "mean"),
223-
week_reference = 4L,
224-
week_start = 7L) {
224+
daily_to_weekly_archive <- function(
225+
epi_arch,
226+
agg_columns,
227+
agg_method = c("sum", "mean"),
228+
week_reference = 4L,
229+
week_start = 7L
230+
) {
225231
# How to aggregate the windowed data.
226232
agg_method <- arg_match(agg_method)
227233
# The columns we will later group by when aggregating.
@@ -246,7 +252,7 @@ daily_to_weekly_archive <- function(epi_arch,
246252
function(x, group_keys, ref_time) {
247253
# Slide over the days and aggregate.
248254
x %>%
249-
mutate(week_start = ceiling_date(time_value, "week", week_start = week_start)-1) %>%
255+
mutate(week_start = ceiling_date(time_value, "week", week_start = week_start) - 1) %>%
250256
summarize(across(all_of(agg_columns), agg_fun), .by = all_of(c(keys, "week_start"))) %>%
251257
mutate(time_value = round_date(week_start, "week", week_reference - 1)) %>%
252258
select(-week_start) %>%
@@ -326,7 +332,10 @@ get_health_data <- function(as_of, disease = c("covid", "flu")) {
326332

327333
metadata_path <- here::here(cache_path, "metadata.csv")
328334
if (!file.exists(metadata_path)) {
329-
meta_data <- readr::read_csv("https://healthdata.gov/resource/qqte-vkut.csv?$query=SELECT%20update_date%2C%20days_since_update%2C%20user%2C%20rows%2C%20row_change%2C%20columns%2C%20column_change%2C%20metadata_published%2C%20metadata_updates%2C%20column_level_metadata%2C%20column_level_metadata_updates%2C%20archive_link%20ORDER%20BY%20update_date%20DESC%20LIMIT%2010000", show_col_types = FALSE)
335+
meta_data <- readr::read_csv(
336+
"https://healthdata.gov/resource/qqte-vkut.csv?$query=SELECT%20update_date%2C%20days_since_update%2C%20user%2C%20rows%2C%20row_change%2C%20columns%2C%20column_change%2C%20metadata_published%2C%20metadata_updates%2C%20column_level_metadata%2C%20column_level_metadata_updates%2C%20archive_link%20ORDER%20BY%20update_date%20DESC%20LIMIT%2010000",
337+
show_col_types = FALSE
338+
)
330339
readr::write_csv(meta_data, metadata_path)
331340
} else {
332341
meta_data <- readr::read_csv(metadata_path, show_col_types = FALSE)
@@ -349,10 +358,11 @@ get_health_data <- function(as_of, disease = c("covid", "flu")) {
349358
data <- readr::read_csv(data_filepath, show_col_types = FALSE)
350359
}
351360
if (disease == "covid") {
352-
data %<>% mutate(
353-
hhs = previous_day_admission_adult_covid_confirmed +
354-
previous_day_admission_pediatric_covid_confirmed
355-
)
361+
data %<>%
362+
mutate(
363+
hhs = previous_day_admission_adult_covid_confirmed +
364+
previous_day_admission_pediatric_covid_confirmed
365+
)
356366
} else if (disease == "flu") {
357367
data %<>% mutate(hhs = previous_day_admission_influenza_confirmed)
358368
}
@@ -403,9 +413,13 @@ calculate_burden_adjustment <- function(flusurv_latest) {
403413
separate(Season, into = c("StartYear", "season"), sep = "-") %>%
404414
select(season, contains("Estimate")) %>%
405415
mutate(season = as.double(season)) %>%
406-
mutate(season = paste0(
407-
as.character(season - 1), "/", substr(season, 3, 4)
408-
))
416+
mutate(
417+
season = paste0(
418+
as.character(season - 1),
419+
"/",
420+
substr(season, 3, 4)
421+
)
422+
)
409423
# get population data
410424
us_population <- readr::read_csv(here::here("aux_data", "flusion_data", "us_pop.csv"), show_col_types = FALSE) %>%
411425
rename(us_pop = POPTOTUSA647NWDB) %>%
@@ -434,16 +448,15 @@ generate_flusurv_adjusted <- function(day_of_week = 1) {
434448
) %>%
435449
select(geo_value = location, time_value = epiweek, hosp_rate = rate_overall, version = issue) %>%
436450
drop_na() %>%
437-
mutate(agg_level = case_when(
438-
geo_value == "network_all" ~ "nation",
439-
TRUE ~ "state"
440-
)) %>%
441451
mutate(
442-
geo_value = if_else(agg_level == "nation",
443-
str_replace_all(geo_value, "network_all", "us"),
444-
tolower(geo_value)
452+
agg_level = case_when(
453+
geo_value == "network_all" ~ "nation",
454+
TRUE ~ "state"
445455
)
446456
) %>%
457+
mutate(
458+
geo_value = if_else(agg_level == "nation", str_replace_all(geo_value, "network_all", "us"), tolower(geo_value))
459+
) %>%
447460
mutate(
448461
geo_value = if_else(
449462
geo_value %in% c("ny_rochester", "ny_albany"),
@@ -494,10 +507,7 @@ generate_flusurv_adjusted <- function(day_of_week = 1) {
494507
mutate(adj_hosp_rate = hosp_rate * adj_factor, source = "flusurv")
495508
flusurv_lat %>%
496509
mutate(
497-
geo_value = if_else(geo_value %in% c("ny_rochester", "ny_albany"),
498-
"ny",
499-
geo_value
500-
)
510+
geo_value = if_else(geo_value %in% c("ny_rochester", "ny_albany"), "ny", geo_value)
501511
) %>%
502512
group_by(geo_value, time_value, version, agg_level) %>%
503513
summarise(
@@ -517,18 +527,21 @@ generate_flusurv_adjusted <- function(day_of_week = 1) {
517527
process_who_nrevss <- function(filename1, filename2, filename3) {
518528
clinical_lab_pos <- readr::read_csv(
519529
here::here("aux_data", "flusion_data", filename1),
520-
skip = 1, show_col_types = FALSE
530+
skip = 1,
531+
show_col_types = FALSE
521532
) %>%
522533
select("REGION TYPE", "REGION", "YEAR", "WEEK", "PERCENT POSITIVE")
523534
combined_pos <- readr::read_csv(
524535
here::here("aux_data", "flusion_data", filename2),
525-
skip = 1, show_col_types = FALSE
536+
skip = 1,
537+
show_col_types = FALSE
526538
) %>%
527539
select("REGION TYPE", "REGION", "YEAR", "WEEK", "PERCENT POSITIVE")
528540
pos_state <- bind_rows(clinical_lab_pos, combined_pos)
529541
ili_state <- readr::read_csv(
530542
here::here("aux_data", "flusion_data", filename3),
531-
skip = 1, show_col_types = FALSE
543+
skip = 1,
544+
show_col_types = FALSE
532545
) %>%
533546
select("REGION TYPE", "REGION", "YEAR", "WEEK", "% WEIGHTED ILI", "%UNWEIGHTED ILI")
534547
merge(pos_state, ili_state, by = c("REGION TYPE", "REGION", "YEAR", "WEEK")) %>%
@@ -565,14 +578,10 @@ gen_ili_data <- function(default_day_of_week = 1) {
565578
mutate(agg_level = str_replace_all(agg_level, "HHS Regions", "hhs_region")) %>%
566579
mutate(agg_level = str_replace_all(agg_level, "National", "nation")) %>%
567580
mutate(agg_level = str_replace_all(agg_level, "States", "state")) %>%
568-
mutate(geo_value = if_else(agg_level == "hhs_region",
569-
str_replace_all(geo_value, "Region (\\d+)", "\\1"),
570-
geo_value
571-
)) %>%
572-
mutate(geo_value = if_else(agg_level == "nation",
573-
str_replace_all(geo_value, "X", "us"),
574-
geo_value
575-
)) %>%
581+
mutate(
582+
geo_value = if_else(agg_level == "hhs_region", str_replace_all(geo_value, "Region (\\d+)", "\\1"), geo_value)
583+
) %>%
584+
mutate(geo_value = if_else(agg_level == "nation", str_replace_all(geo_value, "X", "us"), geo_value)) %>%
576585
rename(epiyear = YEAR, epiweek = WEEK) %>%
577586
left_join(
578587
(.) %>%
@@ -587,6 +596,7 @@ gen_ili_data <- function(default_day_of_week = 1) {
587596
# map names to lower case
588597
name_map <- tibble(abb = state.abb, name = state.name) %>%
589598
bind_rows(
599+
# fmt: skip
590600
tribble(
591601
~name, ~abb,
592602
"District of Columbia", "DC",
@@ -605,8 +615,18 @@ gen_ili_data <- function(default_day_of_week = 1) {
605615
filter(agg_level == "state") %>%
606616
left_join(name_map, by = join_by(geo_value == name)) %>%
607617
select(
608-
geo_value = abb, time_value, version, agg_level, value, season,
609-
season_week, `PERCENT POSITIVE`, `% WEIGHTED ILI`, source, epiyear, epiweek
618+
geo_value = abb,
619+
time_value,
620+
version,
621+
agg_level,
622+
value,
623+
season,
624+
season_week,
625+
`PERCENT POSITIVE`,
626+
`% WEIGHTED ILI`,
627+
source,
628+
epiyear,
629+
epiweek
610630
)
611631

612632
# aggregate NYC and NY state
@@ -642,7 +662,11 @@ gen_ili_data <- function(default_day_of_week = 1) {
642662
#' @param disease_name The name of the disease ("nhsn_covid" or "nhsn_flu")
643663
#' @return An epi_archive of the NHSN data.
644664
get_nhsn_data_archive <- function(disease_name) {
645-
aws.s3::s3read_using(nanoparquet::read_parquet, object = "nhsn_data_archive.parquet", bucket = "forecasting-team-data") %>%
665+
aws.s3::s3read_using(
666+
nanoparquet::read_parquet,
667+
object = "nhsn_data_archive.parquet",
668+
bucket = "forecasting-team-data"
669+
) %>%
646670
filter(disease == disease_name) %>%
647671
filter(!grepl("region.*", geo_value)) %>%
648672
select(-version_timestamp, -disease) %>%

R/default_epipredict_args.R

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,25 @@
44
#' list is the columns used, and then it moves on to normal vectors for the
55
#' rest
66
default_args_list <- function(
7-
lags = c(0L, 7L, 14L),
8-
ahead = 7L,
9-
n_training = Inf,
10-
forecast_date = NULL,
11-
target_date = NULL,
12-
adjust_latency = c("extend_lags", "locf", "none", "extend_ahead"),
13-
warn_latency = TRUE,
14-
quantile_levels = covidhub_probs(),
15-
symmetrize = TRUE,
16-
nonneg = TRUE,
17-
quantile_by_key = character(0L),
18-
check_enough_data_n = NULL,
19-
check_enough_data_epi_keys = NULL,
20-
keys_to_ignore = list(),
21-
seasonal_window = FALSE,
22-
seasonal_backward_window = 5 * 7,
23-
seasonal_forward_window = 3 * 7,
24-
...) {
7+
lags = c(0L, 7L, 14L),
8+
ahead = 7L,
9+
n_training = Inf,
10+
forecast_date = NULL,
11+
target_date = NULL,
12+
adjust_latency = c("extend_lags", "locf", "none", "extend_ahead"),
13+
warn_latency = TRUE,
14+
quantile_levels = covidhub_probs(),
15+
symmetrize = TRUE,
16+
nonneg = TRUE,
17+
quantile_by_key = character(0L),
18+
check_enough_data_n = NULL,
19+
check_enough_data_epi_keys = NULL,
20+
keys_to_ignore = list(),
21+
seasonal_window = FALSE,
22+
seasonal_backward_window = 5 * 7,
23+
seasonal_forward_window = 3 * 7,
24+
...
25+
) {
2526
# error checking if lags is a list
2627
rlang::check_dots_empty()
2728
.lags <- lags
@@ -42,7 +43,8 @@ default_args_list <- function(
4243

4344
if (!is.null(forecast_date) && !is.null(target_date)) {
4445
if (forecast_date + ahead != target_date) {
45-
cli_abort("`forecast_date` {.val {forecast_date}} + `ahead` {.val {ahead}} must equal `target_date` {.val {target_date}}.",
46+
cli_abort(
47+
"`forecast_date` {.val {forecast_date}} + `ahead` {.val {ahead}} must equal `target_date` {.val {target_date}}.",
4648
class = "epipredict__arx_args__inconsistent_target_ahead_forecaste_date"
4749
)
4850
}
@@ -82,9 +84,16 @@ default_args_list <- function(
8284

8385
# Generated from function body. Editing this file has no effect.
8486
default_flatline_args <- function(
85-
ahead = 7L, n_training = Inf, forecast_date = NULL,
86-
target_date = NULL, quantile_levels = covidhub_probs(), symmetrize = TRUE,
87-
nonneg = TRUE, quantile_by_key = character(0L), ...) {
87+
ahead = 7L,
88+
n_training = Inf,
89+
forecast_date = NULL,
90+
target_date = NULL,
91+
quantile_levels = covidhub_probs(),
92+
symmetrize = TRUE,
93+
nonneg = TRUE,
94+
quantile_by_key = character(0L),
95+
...
96+
) {
8897
rlang::check_dots_empty()
8998
epipredict:::arg_is_scalar(ahead, n_training)
9099
epipredict:::arg_is_chr(quantile_by_key, allow_empty = TRUE)
@@ -98,15 +107,22 @@ default_flatline_args <- function(
98107
}
99108
if (!is.null(forecast_date) && !is.null(target_date)) {
100109
if (forecast_date + ahead != target_date) {
101-
cli_warn(c("`forecast_date` + `ahead` must equal `target_date`.",
110+
cli_warn(c(
111+
"`forecast_date` + `ahead` must equal `target_date`.",
102112
i = "{.val {forecast_date}} + {.val {ahead}} != {.val {target_date}}."
103113
))
104114
}
105115
}
106116
structure(
107117
epipredict:::enlist(
108-
ahead, n_training, forecast_date, target_date,
109-
quantile_levels, symmetrize, nonneg, quantile_by_key
118+
ahead,
119+
n_training,
120+
forecast_date,
121+
target_date,
122+
quantile_levels,
123+
symmetrize,
124+
nonneg,
125+
quantile_by_key
110126
),
111127
class = c("flat_fcast", "alist")
112128
)

0 commit comments

Comments
 (0)