@@ -8,10 +8,7 @@ convert_epiweek_to_season <- function(epiyear, epiweek) {
8
8
}
9
9
10
10
epiweeks_in_year <- function (year ) {
11
- last_week_of_year <- seq.Date(as.Date(paste0(year , " -12-24" )),
12
- as.Date(paste0(year , " -12-31" )),
13
- by = 1
14
- )
11
+ last_week_of_year <- seq.Date(as.Date(paste0(year , " -12-24" )), as.Date(paste0(year , " -12-31" )), by = 1 )
15
12
return (max(as.numeric(MMWRweek :: MMWRweek(last_week_of_year )$ MMWRweek )))
16
13
}
17
14
@@ -73,17 +70,20 @@ step_season_week_sine <- function(preproc, season = 35) {
73
70
# ' but for now it's not worth the time
74
71
# ' @param original_dataset tibble or epi_df, should have states as 2 letter lower case
75
72
add_pop_and_density <-
76
- function (original_dataset ,
77
- apportion_filename = here :: here(" aux_data" , " flusion_data" , " apportionment.csv" ),
78
- state_code_filename = here :: here(" aux_data" , " flusion_data" , " state_codes_table.csv" ),
79
- hhs_code_filename = here :: here(" aux_data" , " flusion_data" , " state_code_hhs_table.csv" )) {
73
+ function (
74
+ original_dataset ,
75
+ apportion_filename = here :: here(" aux_data" , " flusion_data" , " apportionment.csv" ),
76
+ state_code_filename = here :: here(" aux_data" , " flusion_data" , " state_codes_table.csv" ),
77
+ hhs_code_filename = here :: here(" aux_data" , " flusion_data" , " state_code_hhs_table.csv" )
78
+ ) {
80
79
pops_by_state_hhs <- gen_pop_and_density_data(apportion_filename , state_code_filename , hhs_code_filename )
81
80
# if the dataset uses "usa" instead of "us", substitute that
82
81
if (" usa" %in% unique(original_dataset )$ geo_value ) {
83
82
pops_by_state_hhs %<> %
84
83
mutate(
85
84
geo_value = ifelse(geo_value == " us" , " usa" , geo_value ),
86
- agg_level = ifelse(grepl(" [0-9]{2}" , geo_value ),
85
+ agg_level = ifelse(
86
+ grepl(" [0-9]{2}" , geo_value ),
87
87
" hhs_region" ,
88
88
ifelse((" us" == geo_value ) | (" usa" == geo_value ), " nation" , " state" )
89
89
)
@@ -107,17 +107,21 @@ add_pop_and_density <-
107
107
108
108
add_agg_level <- function (data ) {
109
109
data %> %
110
- mutate(agg_level = case_when(
111
- grepl(" [0-9]{2}" , geo_value ) ~ " hhs_region" ,
112
- geo_value %in% c(" us" , " usa" ) ~ " nation" ,
113
- .default = " state"
114
- ))
110
+ mutate(
111
+ agg_level = case_when(
112
+ grepl(" [0-9]{2}" , geo_value ) ~ " hhs_region" ,
113
+ geo_value %in% c(" us" , " usa" ) ~ " nation" ,
114
+ .default = " state"
115
+ )
116
+ )
115
117
}
116
118
117
119
gen_pop_and_density_data <-
118
- function (apportion_filename = here :: here(" aux_data" , " flusion_data" , " apportionment.csv" ),
119
- state_code_filename = here :: here(" aux_data" , " flusion_data" , " state_codes_table.csv" ),
120
- hhs_code_filename = here :: here(" aux_data" , " flusion_data" , " state_code_hhs_table.csv" )) {
120
+ function (
121
+ apportion_filename = here :: here(" aux_data" , " flusion_data" , " apportionment.csv" ),
122
+ state_code_filename = here :: here(" aux_data" , " flusion_data" , " state_codes_table.csv" ),
123
+ hhs_code_filename = here :: here(" aux_data" , " flusion_data" , " state_code_hhs_table.csv" )
124
+ ) {
121
125
apportionment_data <- readr :: read_csv(apportion_filename , show_col_types = FALSE ) %> % as_tibble()
122
126
imputed_pop_data <- apportionment_data %> %
123
127
filter(`Geography Type` %in% c(" State" , " Nation" )) %> %
@@ -217,11 +221,13 @@ daily_to_weekly <- function(epi_df, agg_method = c("sum", "mean"), keys = "geo_v
217
221
# ' Note that this is 1-indexed, so 1 = Sunday, 2 = Monday, ..., 7 = Saturday.
218
222
# ' @param week_start the day of the week to use as the start of the week (Sunday is default).
219
223
# ' Note that this is 1-indexed, so 1 = Sunday, 2 = Monday, ..., 7 = Saturday.
220
- daily_to_weekly_archive <- function (epi_arch ,
221
- agg_columns ,
222
- agg_method = c(" sum" , " mean" ),
223
- week_reference = 4L ,
224
- week_start = 7L ) {
224
+ daily_to_weekly_archive <- function (
225
+ epi_arch ,
226
+ agg_columns ,
227
+ agg_method = c(" sum" , " mean" ),
228
+ week_reference = 4L ,
229
+ week_start = 7L
230
+ ) {
225
231
# How to aggregate the windowed data.
226
232
agg_method <- arg_match(agg_method )
227
233
# The columns we will later group by when aggregating.
@@ -246,7 +252,7 @@ daily_to_weekly_archive <- function(epi_arch,
246
252
function (x , group_keys , ref_time ) {
247
253
# Slide over the days and aggregate.
248
254
x %> %
249
- mutate(week_start = ceiling_date(time_value , " week" , week_start = week_start )- 1 ) %> %
255
+ mutate(week_start = ceiling_date(time_value , " week" , week_start = week_start ) - 1 ) %> %
250
256
summarize(across(all_of(agg_columns ), agg_fun ), .by = all_of(c(keys , " week_start" ))) %> %
251
257
mutate(time_value = round_date(week_start , " week" , week_reference - 1 )) %> %
252
258
select(- week_start ) %> %
@@ -326,7 +332,10 @@ get_health_data <- function(as_of, disease = c("covid", "flu")) {
326
332
327
333
metadata_path <- here :: here(cache_path , " metadata.csv" )
328
334
if (! file.exists(metadata_path )) {
329
- meta_data <- readr :: read_csv(" https://healthdata.gov/resource/qqte-vkut.csv?$query=SELECT%20update_date%2C%20days_since_update%2C%20user%2C%20rows%2C%20row_change%2C%20columns%2C%20column_change%2C%20metadata_published%2C%20metadata_updates%2C%20column_level_metadata%2C%20column_level_metadata_updates%2C%20archive_link%20ORDER%20BY%20update_date%20DESC%20LIMIT%2010000" , show_col_types = FALSE )
335
+ meta_data <- readr :: read_csv(
336
+ " https://healthdata.gov/resource/qqte-vkut.csv?$query=SELECT%20update_date%2C%20days_since_update%2C%20user%2C%20rows%2C%20row_change%2C%20columns%2C%20column_change%2C%20metadata_published%2C%20metadata_updates%2C%20column_level_metadata%2C%20column_level_metadata_updates%2C%20archive_link%20ORDER%20BY%20update_date%20DESC%20LIMIT%2010000" ,
337
+ show_col_types = FALSE
338
+ )
330
339
readr :: write_csv(meta_data , metadata_path )
331
340
} else {
332
341
meta_data <- readr :: read_csv(metadata_path , show_col_types = FALSE )
@@ -349,10 +358,11 @@ get_health_data <- function(as_of, disease = c("covid", "flu")) {
349
358
data <- readr :: read_csv(data_filepath , show_col_types = FALSE )
350
359
}
351
360
if (disease == " covid" ) {
352
- data %<> % mutate(
353
- hhs = previous_day_admission_adult_covid_confirmed +
354
- previous_day_admission_pediatric_covid_confirmed
355
- )
361
+ data %<> %
362
+ mutate(
363
+ hhs = previous_day_admission_adult_covid_confirmed +
364
+ previous_day_admission_pediatric_covid_confirmed
365
+ )
356
366
} else if (disease == " flu" ) {
357
367
data %<> % mutate(hhs = previous_day_admission_influenza_confirmed )
358
368
}
@@ -403,9 +413,13 @@ calculate_burden_adjustment <- function(flusurv_latest) {
403
413
separate(Season , into = c(" StartYear" , " season" ), sep = " -" ) %> %
404
414
select(season , contains(" Estimate" )) %> %
405
415
mutate(season = as.double(season )) %> %
406
- mutate(season = paste0(
407
- as.character(season - 1 ), " /" , substr(season , 3 , 4 )
408
- ))
416
+ mutate(
417
+ season = paste0(
418
+ as.character(season - 1 ),
419
+ " /" ,
420
+ substr(season , 3 , 4 )
421
+ )
422
+ )
409
423
# get population data
410
424
us_population <- readr :: read_csv(here :: here(" aux_data" , " flusion_data" , " us_pop.csv" ), show_col_types = FALSE ) %> %
411
425
rename(us_pop = POPTOTUSA647NWDB ) %> %
@@ -434,16 +448,15 @@ generate_flusurv_adjusted <- function(day_of_week = 1) {
434
448
) %> %
435
449
select(geo_value = location , time_value = epiweek , hosp_rate = rate_overall , version = issue ) %> %
436
450
drop_na() %> %
437
- mutate(agg_level = case_when(
438
- geo_value == " network_all" ~ " nation" ,
439
- TRUE ~ " state"
440
- )) %> %
441
451
mutate(
442
- geo_value = if_else( agg_level == " nation " ,
443
- str_replace_all( geo_value , " network_all" , " us " ) ,
444
- tolower( geo_value )
452
+ agg_level = case_when(
453
+ geo_value == " network_all" ~ " nation " ,
454
+ TRUE ~ " state "
445
455
)
446
456
) %> %
457
+ mutate(
458
+ geo_value = if_else(agg_level == " nation" , str_replace_all(geo_value , " network_all" , " us" ), tolower(geo_value ))
459
+ ) %> %
447
460
mutate(
448
461
geo_value = if_else(
449
462
geo_value %in% c(" ny_rochester" , " ny_albany" ),
@@ -494,10 +507,7 @@ generate_flusurv_adjusted <- function(day_of_week = 1) {
494
507
mutate(adj_hosp_rate = hosp_rate * adj_factor , source = " flusurv" )
495
508
flusurv_lat %> %
496
509
mutate(
497
- geo_value = if_else(geo_value %in% c(" ny_rochester" , " ny_albany" ),
498
- " ny" ,
499
- geo_value
500
- )
510
+ geo_value = if_else(geo_value %in% c(" ny_rochester" , " ny_albany" ), " ny" , geo_value )
501
511
) %> %
502
512
group_by(geo_value , time_value , version , agg_level ) %> %
503
513
summarise(
@@ -517,18 +527,21 @@ generate_flusurv_adjusted <- function(day_of_week = 1) {
517
527
process_who_nrevss <- function (filename1 , filename2 , filename3 ) {
518
528
clinical_lab_pos <- readr :: read_csv(
519
529
here :: here(" aux_data" , " flusion_data" , filename1 ),
520
- skip = 1 , show_col_types = FALSE
530
+ skip = 1 ,
531
+ show_col_types = FALSE
521
532
) %> %
522
533
select(" REGION TYPE" , " REGION" , " YEAR" , " WEEK" , " PERCENT POSITIVE" )
523
534
combined_pos <- readr :: read_csv(
524
535
here :: here(" aux_data" , " flusion_data" , filename2 ),
525
- skip = 1 , show_col_types = FALSE
536
+ skip = 1 ,
537
+ show_col_types = FALSE
526
538
) %> %
527
539
select(" REGION TYPE" , " REGION" , " YEAR" , " WEEK" , " PERCENT POSITIVE" )
528
540
pos_state <- bind_rows(clinical_lab_pos , combined_pos )
529
541
ili_state <- readr :: read_csv(
530
542
here :: here(" aux_data" , " flusion_data" , filename3 ),
531
- skip = 1 , show_col_types = FALSE
543
+ skip = 1 ,
544
+ show_col_types = FALSE
532
545
) %> %
533
546
select(" REGION TYPE" , " REGION" , " YEAR" , " WEEK" , " % WEIGHTED ILI" , " %UNWEIGHTED ILI" )
534
547
merge(pos_state , ili_state , by = c(" REGION TYPE" , " REGION" , " YEAR" , " WEEK" )) %> %
@@ -565,14 +578,10 @@ gen_ili_data <- function(default_day_of_week = 1) {
565
578
mutate(agg_level = str_replace_all(agg_level , " HHS Regions" , " hhs_region" )) %> %
566
579
mutate(agg_level = str_replace_all(agg_level , " National" , " nation" )) %> %
567
580
mutate(agg_level = str_replace_all(agg_level , " States" , " state" )) %> %
568
- mutate(geo_value = if_else(agg_level == " hhs_region" ,
569
- str_replace_all(geo_value , " Region (\\ d+)" , " \\ 1" ),
570
- geo_value
571
- )) %> %
572
- mutate(geo_value = if_else(agg_level == " nation" ,
573
- str_replace_all(geo_value , " X" , " us" ),
574
- geo_value
575
- )) %> %
581
+ mutate(
582
+ geo_value = if_else(agg_level == " hhs_region" , str_replace_all(geo_value , " Region (\\ d+)" , " \\ 1" ), geo_value )
583
+ ) %> %
584
+ mutate(geo_value = if_else(agg_level == " nation" , str_replace_all(geo_value , " X" , " us" ), geo_value )) %> %
576
585
rename(epiyear = YEAR , epiweek = WEEK ) %> %
577
586
left_join(
578
587
(. ) %> %
@@ -587,6 +596,7 @@ gen_ili_data <- function(default_day_of_week = 1) {
587
596
# map names to lower case
588
597
name_map <- tibble(abb = state.abb , name = state.name ) %> %
589
598
bind_rows(
599
+ # fmt: skip
590
600
tribble(
591
601
~ name , ~ abb ,
592
602
" District of Columbia" , " DC" ,
@@ -605,8 +615,18 @@ gen_ili_data <- function(default_day_of_week = 1) {
605
615
filter(agg_level == " state" ) %> %
606
616
left_join(name_map , by = join_by(geo_value == name )) %> %
607
617
select(
608
- geo_value = abb , time_value , version , agg_level , value , season ,
609
- season_week , `PERCENT POSITIVE` , `% WEIGHTED ILI` , source , epiyear , epiweek
618
+ geo_value = abb ,
619
+ time_value ,
620
+ version ,
621
+ agg_level ,
622
+ value ,
623
+ season ,
624
+ season_week ,
625
+ `PERCENT POSITIVE` ,
626
+ `% WEIGHTED ILI` ,
627
+ source ,
628
+ epiyear ,
629
+ epiweek
610
630
)
611
631
612
632
# aggregate NYC and NY state
@@ -642,7 +662,11 @@ gen_ili_data <- function(default_day_of_week = 1) {
642
662
# ' @param disease_name The name of the disease ("nhsn_covid" or "nhsn_flu")
643
663
# ' @return An epi_archive of the NHSN data.
644
664
get_nhsn_data_archive <- function (disease_name ) {
645
- aws.s3 :: s3read_using(nanoparquet :: read_parquet , object = " nhsn_data_archive.parquet" , bucket = " forecasting-team-data" ) %> %
665
+ aws.s3 :: s3read_using(
666
+ nanoparquet :: read_parquet ,
667
+ object = " nhsn_data_archive.parquet" ,
668
+ bucket = " forecasting-team-data"
669
+ ) %> %
646
670
filter(disease == disease_name ) %> %
647
671
filter(! grepl(" region.*" , geo_value )) %> %
648
672
select(- version_timestamp , - disease ) %> %
0 commit comments