The source code for this project is at:
https://github.com/justlab/COVID_19_admin_disparities
This project is part of the the following publication:
Carrión, D., Colicino, E., Pedretti, N. F., Arfer, K. B., Rush, J., DeFelice, N., & Just, A. C. (2021). Neighborhood-level disparities and subway utilization during the COVID-19 pandemic in New York City. Nature Communications, 12(1). https://doi.org/10.1038/s41467-021-24088-7
t_start = Sys.time()
# CRAN packages:
library(tidyverse)
library(sf)
library(lubridate)
library(tidycensus)
library(ggExtra)
library(ggridges)
library(ggsn)
library(ragg)
library(rstan)
library(drc)
library(spdep)
library(broom)
library(MASS)
library(spatialreg)
library(here)
library(pdftools)
library(matrixStats)
library(egg)
library(ggpubr)
library(scales)
library(qs)
library(corrplot)
library(readxl)
library(splines)
library(magic)
library(httr)
library(jsonlite)
library(DHARMa)
library(kableExtra)
library(lwgeom)
# Github packages available via:
# remotes::install_github("justlab/Just_universal")
# remotes::install_github("justlab/MTA_turnstile")
library(Just.universal)
#### SESSION CONFIGURATION ####
options(dplyr.summarise.inform=FALSE)
# data will default to a subfolder "data/" within working directory
# unless 1. set by an environment variable:
data.root = Sys.getenv("COVID_DATA")
# or 2. set with an alternative path here:
if (data.root == "") data.root = here("data")
if (!dir.exists(data.root)) dir.create(data.root)
print(paste("data being downloaded into directory", dQuote(data.root)))
## [1] "data being downloaded into directory \"/home/rushj03/dev/COVID_19_admin_disparities/data\""
# Get some data from the git repository rather than downloading from original
# source, to avoid changes in model results due to updated data
use_repo_data = TRUE
if(Sys.getenv("MTA_TURNSTILE_DATA_DIR") == ""){
# set up default download location for MTA turnstile data
mta_dir = file.path(data.root, "mta_turnstile")
if(!dir.exists(mta_dir)) dir.create(mta_dir, recursive = TRUE)
Sys.setenv(MTA_TURNSTILE_DATA_DIR = mta_dir)
}
library(MTA.turnstile)
## MTA.turnstile data directory: /home/rushj03/dev/COVID_19_admin_disparities/data/mta_turnstile
t_turnstile_1 = Sys.time()
# output path for figures
fig.path = here("figures")
if(!dir.exists(fig.path)) dir.create(fig.path)
export.figs = TRUE
vector.figs = FALSE
if(export.figs) message(paste0("Saving ", ifelse(vector.figs, "PDF", "PNG") ," figures to:\n "), fig.path) else message("Not saving figures")
## Saving PNG figures to:
## /home/rushj03/dev/COVID_19_admin_disparities/figures
# source file output directory for publication
source_path = here("figures", "source_files")
if(!dir.exists(source_path)) dir.create(source_path)
# To generate census data, you need an API key, which you can request here: https://api.census.gov/data/key_signup.html
#census_api_key("INSERT YOUR CENSUS API KEY HERE", install = TRUE)
if(Sys.getenv("CENSUS_API_KEY")=="") warning("Census API Key Missing")
# pairmemo function cache
pairmemo.dir = file.path(data.root, "pairmemo")
dir.create(pairmemo.dir, showWarnings = F)
pm = function(...) pairmemo(
directory = pairmemo.dir,
n.frame = 2,
...)
pairmemo
caches the results of functions on disk based on their input parameters.
If you run this script and then make changes to code, we recommend that you clear the pairmemo cache of any functions that run in the lines following your change before running the script again.
The following commented lines would clear the pairmemo cache for each function. If you do not change the input Census or Pluto data, you would likely only need to run the last one.
#pairmemo.clear(get.Pluto)
#pairmemo.clear(get.qn.blocks)
#pairmemo.clear(acs.main)
#pairmemo.clear(get_essential_acs)
#pairmemo.clear(get.tract.res)
#pairmemo.clear(fit_BWQS_model)
#### Functions ####
read_w_filenames <- function(flnm) {
read_csv(flnm) %>%
mutate(filename = flnm)
}
extract_waic <- function (stanfit){
log_lik <- rstan::extract(stanfit, "log_lik")$log_lik
dim(log_lik) <- if (length(dim(log_lik)) == 1)
c(length(log_lik), 1)
else c(dim(log_lik)[1], prod(dim(log_lik)[2:length(dim(log_lik))]))
S <- nrow(log_lik)
n <- ncol(log_lik)
lpd <- log(colMeans(exp(log_lik)))
p_waic <- colVars(log_lik)
elpd_waic <- lpd - p_waic
waic <- -2 * elpd_waic
loo_weights_raw <- 1/exp(log_lik - max(log_lik))
loo_weights_normalized <- loo_weights_raw/matrix(colMeans(loo_weights_raw),
nrow = S, ncol = n, byrow = TRUE)
loo_weights_regularized <- pmin(loo_weights_normalized, sqrt(S))
elpd_loo <- log(colMeans(exp(log_lik) * loo_weights_regularized)/colMeans(loo_weights_regularized))
p_loo <- lpd - elpd_loo
pointwise <- cbind(waic, lpd, p_waic, elpd_waic, p_loo, elpd_loo)
total <- colSums(pointwise)
se <- sqrt(n * colVars(pointwise))
return(list(waic = total["waic"], elpd_waic = total["elpd_waic"],
p_waic = total["p_waic"], elpd_loo = total["elpd_loo"],
p_loo = total["p_loo"]))
}
# Used to export most figures; figures that print as side effect like corrplot() not supported
write_figs <- function(fig, fig_name, fig_width, fig_height, vector_figs = vector.figs){
fig_path = file.path(fig.path, paste0(fig_name, "_", Sys.Date()))
if("gg" %in% class(fig)){
ggsave(fig, filename = paste0(fig_path, ifelse(vector_figs, ".svg", ".png")),
width = fig_width, height = fig_height, dpi = 300)
} else {
if(vector_figs){
svg(paste0(fig_path, ".svg"), pointsize = 9)
} else {
png(filename = paste0(fig_path, ".png"), width = fig_width, height = fig_height)
}
print(fig)
dev.off()
}
if(vector_figs){ # convert SVG to PDF
system(paste0("rsvg-convert -f pdf -o ", fig_path, ".pdf", " ", fig_path, ".svg"))
unlink(paste0(fig_path, ".svg"))
}
message(paste0("Wrote ", fig_path, ifelse(vector_figs, ".pdf", ".png")))
}
# Download a file, update metadata records, and load it with function `f`
# File metadata is stored in a sqlite file, by default in data/downloads/meta.sqlite
download = function(url, to, f, ...){
f(download.update.meta(url, file.path(data.root, "downloads"), to),
...)
}
#### Load Data ####
# Check if subway turnstile data is already downloaded
lf1 = list.files(file.path(Sys.getenv("MTA_TURNSTILE_DATA_DIR"), "downloads/mta_turnstile/"), pattern = ".txt")
# Check if other data is already downloaded
lf2 = list.files(file.path(data.root, "downloads"), pattern = "ny_xwalk.csv.gz")
# Subway ridership data
Subway_ridership_by_UHF <- relative.subway.usage(2020L, "nhood")
t_turnstile_2 = Sys.time()
# get the Pluto dataset from #https://www1.nyc.gov/site/planning/data-maps/open-data/dwn-pluto-mappluto.page
pm(fst = T,
get.Pluto <- function() download(
"https://www1.nyc.gov/assets/planning/download/zip/data-maps/open-data/nyc_pluto_20v3_csv.zip",
"pluto.zip",
function(p)
read_csv(unz(p, "pluto_20v3.csv"), col_types = cols(spdist2 = col_character(),
overlay2 = col_character(),
zonedist4 = col_character()))[,
c("landuse", "bbl", "numfloors", "unitstotal", "unitsres",
"zipcode")]))
Pluto <- as.data.frame(get.Pluto())
# If you get an error in running this function, the download may be incomplete/corrupt
# In that case, delete the pluto.zip file and try again:
#unlink(file.path(data.root, "downloads", "pluto.zip"))
if (file.exists(file.path(data.root, "Bldg_Footprints.qs"))) {
Bldg_Footprints <- qread(file.path(data.root, "Bldg_Footprints.qs"))
} else {
Bldg_Footprints <- download(
# https://data.cityofnewyork.us/Housing-Development/Building-Footprints/nqwf-w8eh
"https://data.cityofnewyork.us/api/geospatial/nqwf-w8eh?method=export&format=Shapefile",
"building_footprints.zip",
function(p)
st_read(paste0("/vsizip/", p)))
qsave(Bldg_Footprints, file.path(data.root, "Bldg_Footprints.qs"))
}
ZCTA_by_boro <- download(
"https://web.archive.org/web/20180209024431id_/https://www.health.ny.gov/statistics/cancer/registry/appendix/neighborhoods.htm",
"uhf_neighborhoods.html",
function(p)
{# XML::readHTMLTable doesn't identify the columns correctly.
x = str_match_all(read_file(p), regex(dotall = T, paste0(
'<td headers="header1"[^>]+>\\s*(.+?)</td>',
'(.+?)',
'(?=<td headers="header1"|</table>)')))[[1]]
do.call(rbind, lapply(1 : nrow(x), function(i)
data.frame(boro = x[i, 2], zip = as.integer(
str_extract_all(x[i, 3], "\\b\\d{5}\\b")[[1]]))))})
# Download the specific day of test results by ZCTA being used
ZCTA_test_download <- download(
"https://raw.githubusercontent.com/nychealth/coronavirus-data/6d7c4a94d6472a9ffc061166d099a4e5d89cd3e3/tests-by-zcta.csv",
"2020-05-07_tests-by-zcta.csv",
identity
)
# Download COVID-19 testing data
ZCTA_test_series <- ZCTA_test_download %>%
map_df(~read_w_filenames(.)) %>%
mutate(date = as.Date(str_extract(filename, "[:digit:]{4}-[:digit:]{2}-[:digit:]{2}"))) %>%
dplyr::select(-filename)
## Parsed with column specification:
## cols(
## MODZCTA = col_double(),
## Positive = col_double(),
## Total = col_double(),
## zcta_cum.perc_pos = col_double()
## )
# UHF definitions by zip codes
UHF_ZipCodes <- UHF_ZipCodes <- download(
"http://www.infoshare.org/misc/UHF.pdf",
"uhf_zips.pdf",
function(p)
{x = str_match_all(pdf_text(p)[2],
"(\\d+)\\s+(\\S.+?\\S)\\s*([0-9,]+)")[[1]]
do.call(rbind, lapply(1 : nrow(x), function(i)
data.frame(code = x[i, 2], name = x[i, 3], zip = as.integer(
str_extract_all(x[i, 4], "\\b\\d{5}\\b")[[1]]))))})
# UHF shapefile
UHF_shp <- download(
"https://www1.nyc.gov/assets/doh/downloads/zip/uhf42_dohmh_2009.zip",
"nyc_uhf_nhoods_shapefile.zip",
function(p) read_sf(paste0("/vsizip/", p, "/UHF_42_DOHMH_2009")))
# NYC boroughs from NYC Open Data
NYC_basemap_shp <- download(
"https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=Shapefile",
"Borough_Boundaries.zip",
function(p){
unzip(p, exdir = file.path(data.root, "downloads"))
# open data platform generates a random UUID for every download
ufile = list.files(file.path(data.root, "downloads"), pattern = "geo_export_.*\\.shp", full.names = TRUE)
st_read(ufile, stringsAsFactors = FALSE, quiet = TRUE) %>% st_transform(., crs = 2263)
}
)
# DOHMH MODZCTA Shapefile
MODZCTA_NYC_shp <- download(
"https://data.cityofnewyork.us/api/geospatial/pri4-ifjk?method=export&format=Shapefile",
"Modified Zip Code Tabulation Areas (MODZCTA).zip",
function(p) read_sf(paste0("/vsizip/", p))
)
# Food outlets
if(use_repo_data){
food_retail <- read_csv(here("data", "retail_food_stores_2019-06-13.csv"))
} else {
food_retail <- download(
"https://data.ny.gov/api/views/9a8c-vfzj/rows.csv",
"retail_food_stores.csv",
read_csv)
}
## Parsed with column specification:
## cols(
## County = col_character(),
## `License Number` = col_character(),
## `Operation Type` = col_character(),
## `Establishment Type` = col_character(),
## `Entity Name` = col_character(),
## `DBA Name` = col_character(),
## `Street Number` = col_character(),
## `Street Name` = col_character(),
## `Address Line 2` = col_logical(),
## `Address Line 3` = col_logical(),
## City = col_character(),
## State = col_character(),
## `Zip Code` = col_double(),
## `Square Footage` = col_double(),
## Location = col_character()
## )
# Download deaths by ZCTA as of May 23rd
deaths_by23May2020_by_zcta <- download(
"https://raw.githubusercontent.com/nychealth/coronavirus-data/8d88b2c06cf6b65676d58b28979731faa10c193c/data-by-modzcta.csv",
"2020-05-23_data-by-modzcta.csv",
read_csv
)
## Parsed with column specification:
## cols(
## MODIFIED_ZCTA = col_double(),
## NEIGHBORHOOD_NAME = col_character(),
## BOROUGH_GROUP = col_character(),
## COVID_CASE_COUNT = col_double(),
## COVID_CASE_RATE = col_double(),
## POP_DENOMINATOR = col_double(),
## COVID_DEATH_COUNT = col_double(),
## COVID_DEATH_RATE = col_double(),
## PERCENT_POSITIVE = col_double()
## )
# download MODZCTA to ZCTA crosswalk, current version from repo
modzcta_to_zcta <- download(
"https://raw.githubusercontent.com/nychealth/coronavirus-data/master/Geography-resources/ZCTA-to-MODZCTA.csv",
"ZCTA-to-MODZCTA.csv",
read_csv
)
## Parsed with column specification:
## cols(
## ZCTA = col_double(),
## MODZCTA = col_double()
## )
modzcta_to_zcta1 <- modzcta_to_zcta %>% mutate(ZCTA = as.character(ZCTA))
modzcta_to_zcta2 <- modzcta_to_zcta1 %>% mutate(MODZCTA = as.character(MODZCTA))
MODZCTAs_in_NYC <- as.character(unique(ZCTA_test_series$MODZCTA))
ZCTAs_in_NYC <- as.character(unique(modzcta_to_zcta$ZCTA))
# download ZIP to Tract crosswalk from HUD
zip_to_tract <- download(
"https://www.huduser.gov/portal/datasets/usps/ZIP_TRACT_062020.xlsx",
"ZIP_TRACT_062020.xlsx",
function(p) suppressWarnings(read_excel(path = p, col_types = c("text", "text", "numeric", "skip", "skip", "skip")))
)
# Block to ZCTA and County crosswalk for NY
ny_xwalk <- download("https://lehd.ces.census.gov/data/lodes/LODES7/ny/ny_xwalk.csv.gz",
"ny_xwalk.csv.gz",
function(p) {
zzf = gzfile(p)
read.csv(zzf) %>%
dplyr::select(cty, tabblk2010, zcta, blklondd, blklatdd) %>%
mutate(tabblk2010 = as.character(tabblk2010),
zcta = as.character(zcta),
cntyfips = as.character(cty)) %>%
dplyr::select(-cty)
}
)
# We have many sources of data, so these just help to combine the various data types
NYC_counties1 <- c("Bronx","Kings","Queens","New York","Richmond")
NYC_counties1_full <- c("Bronx County","Kings County","Queens County","New York County","Richmond County")
NYC_boro_county_match <- tibble(County = c("Bronx","Kings","Queens","New York","Richmond"),
boro = c("Bronx","Brooklyn","Queens","Manhattan","Staten Island"),
full_county = c("Bronx County","Kings County","Queens County","New York County","Richmond County"),
fips = c("36005", "36047", "36081", "36061", "36085"))
# stan model script
BWQS_stan_model <- here("code", "nb_bwqs_cov.stan")
#### Census Data ####
# function to pull 2010 block population for Queens & Nassau counties
pm(get.qn.blocks <- function(){
nassau_blk_pop <- get_decennial(geography = "block", variables = "P001001",
state = "NY", county = "Nassau", geometry = FALSE)
queens_blk_pop <- get_decennial(geography = "block", variables = "P001001",
state = "NY", county = "Queens", geometry = FALSE)
bind_rows(nassau_blk_pop, queens_blk_pop) %>%
dplyr::select(GEOID, value) %>% rename("pop2010" = "value")
})
# function to pull 2018 ACS data
pm(acs.main <- function(admin_unit = c("zcta", "tract"), state_unit = c(NULL, "NY"), sf_shapes = c(TRUE, FALSE)) {
ACS_Data <- get_acs(geography = admin_unit,
state = state_unit,
geometry = sf_shapes,
variables = c(medincome = "B19013_001",
total_pop1 = "B01003_001",
fpl_100 = "B06012_002",
fpl_100to150 = "B06012_003",
median_rent = "B25031_001",
total_hholds1 = "B22003_001",
hholds_snap = "B22003_002",
over16total_industry1 = "C24050_001",
ag_industry = "C24050_002",
construct_industry = "C24050_003",
manufact_industry = "C24050_004",
wholesaletrade_industry = "C24050_005",
retail_industry = "C24050_006",
transpo_and_utilities_industry = "C24050_007",
information_industry = "C24050_008",
finance_and_realestate_industry = "C24050_009",
science_mngmt_admin_industry = "C24050_010",
edu_health_socasst_industry = "C24050_011",
arts_entertain_rec_accomodate_industry = "C24050_012",
othsvcs_industry = "C24050_013",
publicadmin_industry = "C24050_014",
total_commute1 = "B08301_001",
drove_commute = "B08301_002",
pubtrans_bus_commute = "B08301_011",
pubtrans_subway_commute = "B08301_013",
pubtrans_railroad_commute = "B08301_013",
pubtrans_ferry_commute = "B08301_015",
taxi_commute = "B08301_016",
bicycle_commute = "B08301_018",
walked_commute = "B08301_019",
workhome_commute = "B08301_021",
unemployed = "B23025_005",
under19_noinsurance = "B27010_017",
age19_34_noinsurance = "B27010_033",
age35_64_noinsurance = "B27010_050",
age65plus_noinsurance = "B27010_066",
hisplat_raceethnic = "B03002_012",
nonhispLat_white_raceethnic = "B03002_003",
nonhispLat_black_raceethnic = "B03002_004",
nonhispLat_amerindian_raceethnic = "B03002_005",
nonhispLat_asian_raceethnic = "B03002_006",
age65_plus = "B08101_008"),
year = 2018,
output = "wide",
survey = "acs5")
if(admin_unit=="zcta"){
ACS_Data <- ACS_Data %>% #only pull out the estimates and cleaning variable names
mutate(GEOID = str_sub(GEOID, -5, -1)) %>%
filter(GEOID %in% ZCTAs_in_NYC) %>%
dplyr::select(-NAME) %>%
dplyr::select(GEOID, !ends_with("M")) %>%
rename_at(vars(ends_with("E")), .funs = list(~str_sub(., end = -2)))
}
if(admin_unit=="tract"){
ACS_Data <- ACS_Data %>% #only pull out the estimates and cleaning variable names
filter(substr(GEOID,1,5) %in% NYC_boro_county_match$fips) %>% # Tracts in NYC counties
dplyr::select(-NAME) %>%
dplyr::select(GEOID, !ends_with("M")) %>%
rename_at(vars(ends_with("E")), .funs = list(~str_sub(., end = -2)))
}
return(ACS_Data)
})
# Use 2010 block population to scale Nassau County ZCTA contribution to NYC MODZCTAs
# This is the method used according to data dictionary at NYC Open Data:
# https://data.cityofnewyork.us/Health/Modified-Zip-Code-Tabulation-Areas-MODZCTA-/pri4-ifjk
nassau_zcta_weights <- function(zcta_acs, mz_to_z, blk_to_z){
blk_pop = get.qn.blocks()
modzcta_span = c("11429", "11411", "11004") # MODZCTA with ZCTAs from both Queens & Nassau
border_zcta <- mz_to_z %>% filter(MODZCTA %in% modzcta_span) %>% pull(ZCTA)
# Filter block-zcta crosswalk table to Queens-Nassau ZCTA of interest
blk_to_z <- blk_to_z %>% filter(zcta %in% border_zcta)
# Join population to block-zcta crosswalk
xwalk_pop <- blk_to_z %>% left_join(blk_pop, by = c("tabblk2010" = "GEOID"))
# Summarise 2010 population by ZCTA to calculate proportions inside NYC
zcta_pop_2010 <- xwalk_pop %>%
group_by(zcta) %>%
summarise(z_pop_2010 = sum(pop2010), .groups = "drop_last")
queens_zcta_pop_2010 <- xwalk_pop %>%
filter(cntyfips == "36081") %>%
group_by(zcta) %>%
summarise(queens_z_pop_2010 = sum(pop2010), .groups = "drop_last")
zcta_pop_props <- queens_zcta_pop_2010 %>%
left_join(zcta_pop_2010, by = "zcta") %>%
mutate(in_NYC_prop = queens_z_pop_2010/z_pop_2010)
zcta_weights <- zcta_acs %>% dplyr::select(GEOID) %>%
left_join(zcta_pop_props, by = c("GEOID" = "zcta")) %>%
dplyr::select(GEOID, in_NYC_prop) %>%
mutate(in_NYC_prop =
case_when(is.na(in_NYC_prop) ~ 1,
TRUE ~ in_NYC_prop))
# Apply weights for all Census variables except median vars
zcta_acs <- zcta_acs %>% left_join(zcta_weights, by = "GEOID")
varvec <- 1:ncol(zcta_acs)
varvec <- varvec[-grep("GEOID|in_NYC_prop|medincome|median_rent", names(zcta_acs))]
zcta_acs <- zcta_acs %>% mutate_at(vars(all_of(varvec)), ~ . * in_NYC_prop)
}
# function to clean ACS data
clean_acs_data_and_derive_vars <- function(df, admin_unit = c("zcta", "tract")){
if(admin_unit=="zcta"){
ACS_Data1a <- df %>%
left_join(., modzcta_to_zcta1, by = c("GEOID" = "ZCTA")) %>%
group_by(MODZCTA) %>%
summarise_at(vars(medincome, median_rent), ~weighted.mean(., total_pop1, na.rm = T)) %>%
rename(zcta = "MODZCTA")
ACS_Data1b <- df %>%
left_join(., modzcta_to_zcta1, by = c("GEOID" = "ZCTA")) %>%
group_by(MODZCTA) %>%
summarise_at(vars(total_pop1:fpl_100to150, total_hholds1:age65_plus), ~sum(.)) %>%
mutate_at(vars(ends_with("_commute")), ~round((./total_commute1)*100, 2)) %>% #proportion of people relying on a given mode of transit
mutate_at(vars(ends_with("_raceethnic")), ~round((./total_pop1)*100, 2)) %>% #proportion of ppl reporting a given race/ethncity
mutate(not_insured = round(((under19_noinsurance + age19_34_noinsurance + age35_64_noinsurance + age65plus_noinsurance) / total_pop1)*100, 2), #proportion uninsured
#snap_hholds = round((hholds_snap/total_hholds1)*100, 2), #proportion relying on SNAP
#fpl_150 = round(((fpl_100+fpl_100to150)/total_pop1)*100, 2), #proportion 150% or less of FPL
unemployed = round((unemployed/over16total_industry1)*100, 2), #proportion unemployed
not_quarantined_jobs = round(((ag_industry+(construct_industry*.25)+wholesaletrade_industry+ #an estimate of who is still leaving the house for work based on industry
(edu_health_socasst_industry*.5)+transpo_and_utilities_industry)/over16total_industry1)*100, 2)) %>%
dplyr::select(-ends_with("_noinsurance"), -fpl_100, -fpl_100to150, -ends_with("_industry"), -hholds_snap) %>%
rename(zcta = "MODZCTA")
ACS_Data2 <- left_join(ACS_Data1a, ACS_Data1b, by = "zcta") %>%
mutate(zcta = as.character(zcta))
} else{
ACS_Data2 <- df %>%
mutate_at(vars(ends_with("_commute")), ~round((./total_commute1)*100, 2)) %>% #proportion of people relying on a givenmode of transit
mutate_at(vars(ends_with("_raceethnic")), ~round((./total_pop1)*100, 2)) %>% #proportion of ppl reporting a given race/ethncity
mutate(not_insured = round(((under19_noinsurance + age19_34_noinsurance + age35_64_noinsurance + age65plus_noinsurance) / total_pop1)*100, 2), #proportion uninsured
unemployed = round((unemployed/over16total_industry1)*100, 2), #proportion unemployed
not_quarantined_jobs = round(((ag_industry+(construct_industry*.25)+wholesaletrade_industry+ #an estimate of who is still leaving the house for work based on industry
(edu_health_socasst_industry*.5)+transpo_and_utilities_industry)/over16total_industry1)*100, 2)) %>%
dplyr::select(-ends_with("_noinsurance"), -ends_with("_industry"),-fpl_100, -fpl_100to150,-hholds_snap)
}
return(ACS_Data2)
}
# Functions to pull mode of transportation for our approximate of essential workers
pm(fst = T,
get_essential_acs <- function(admin_unit, state_unit) {
get_acs(geography = admin_unit, #pull down the relevant categories
state = state_unit,
variables = c(ag_car1_commute = "B08126_017",
ag_pubtrans_commute = "B08126_047",
construct_car1_commute ="B08126_018",
construct_pubtrans_commute = "B08126_048",
wholesale_car1_commute = "B08126_020",
wholesale_pubtrans_commute = "B08126_050",
transpo_car1_commute = "B08126_022",
transpo_pubtrans_commute = "B08126_052",
ed_hlthcare_car1_commute = "B08126_026",
ed_hlthcare_pubtrans_commute = "B08126_056"),
year = 2018,
output = "wide",
survey = "acs5")
})
acs.essential <- function(admin_unit, zcta_pop = NULL, state_unit = NULL) {
if(!admin_unit %in% c("zcta", "tract")) stop("admin_unit must be either 'zcta' or 'tract'")
if(admin_unit == "tract" & is.null(state_unit)) stop("state_unit must be set to download tracts")
if(!is.null(state_unit)) if(state_unit != "NY") stop("state_unit must be either NULL or 'NY'")
ACS_EssentialWrkr_Commute <- get_essential_acs(admin_unit = admin_unit, state_unit = state_unit)
if(admin_unit == "zcta"){
if(is.null(zcta_pop)) stop("zcta_pop must be set for scaling MODZCTA on Queens/Nassau boundary")
ACS_Essential_worker_estimates <- ACS_EssentialWrkr_Commute %>% #clean data and aggregate
dplyr::select(-ends_with("M"), -NAME) %>%
mutate(GEOID = str_sub(GEOID, -5, -1)) %>%
filter(GEOID %in% ZCTAs_in_NYC) %>%
# scale ZCTAs by proportion of population in NYC
right_join(zcta_pop %>% dplyr::select("GEOID", "in_NYC_prop"), by = "GEOID") %>%
mutate_at(vars(2:11), ~ . * in_NYC_prop) %>%
# summarize ZCTA to MODZCTA
left_join(., modzcta_to_zcta1, by = c("GEOID" = "ZCTA")) %>%
group_by(MODZCTA) %>%
summarise_at(vars(2:11),
~ sum(., na.rm = T)) %>%
rename(zcta = "MODZCTA") %>%
mutate_at(vars(starts_with("ed_hlthcare")), ~ round(. / 2), 0) %>% #maintain same proportions as estimated nonquarintined jobs
mutate_at(vars(starts_with("construct")), ~ round(. / 4), 0) %>%
mutate(
essentialworker_drove = rowSums(dplyr::select(., contains("car1_commute"))),
essentialworker_pubtrans = rowSums(dplyr::select(., contains("pubtrans")))) %>%
dplyr::select(zcta, essentialworker_drove, essentialworker_pubtrans) %>%
mutate(zcta = as.character(zcta))
}
else { # tracts
ACS_Essential_worker_estimates <- ACS_EssentialWrkr_Commute %>% #clean data and aggregate
dplyr::select(-ends_with("M"), -NAME) %>%
filter(substr(GEOID,1,5) %in% NYC_boro_county_match$fips) %>% # Tracts in NYC counties
mutate_at(vars(starts_with("ed_hlthcare")), ~round(./2), 0) %>% #maintain same proportions as estimated nonquarintined jobs
mutate_at(vars(starts_with("construct")), ~round(./4), 0) %>%
mutate(essentialworker_drove = rowSums(dplyr::select(., contains("car1_commute"))),
essentialworker_pubtrans = rowSums(dplyr::select(., contains("pubtrans")))) %>%
dplyr::select(GEOID, essentialworker_drove, essentialworker_pubtrans)
}
return(ACS_Essential_worker_estimates)
}
# ZCTA CENSUS DATA
options(tigris_use_cache = TRUE)
ACS_Data1 <- as.data.frame(acs.main("zcta", NULL, FALSE)) #download the zcta data
ACS_Data_scaled <- nassau_zcta_weights(ACS_Data1, modzcta_to_zcta2, ny_xwalk)
ACS_Data2 <- clean_acs_data_and_derive_vars(ACS_Data_scaled, "zcta")
ACS_EssentialWrkr_Commute1 = as.data.frame(acs.essential("zcta", zcta_pop = ACS_Data_scaled, state_unit = NULL))
print(paste("The 2018 5-year ACS population range in NYC MODZCTAs is:", paste(range(ACS_Data2$total_pop1), collapse = "-")))
## [1] "The 2018 5-year ACS population range in NYC MODZCTAs is: 3028-112425"
# TRACT CENSUS DATA
acs_tracts <- acs.main("tract", "NY", TRUE)
acs_tracts2 <- clean_acs_data_and_derive_vars(acs_tracts, "tract")
acs_tracts_commute1 = as.data.frame(acs.essential("tract", state_unit = "NY"))
t_census = Sys.time()
#### Grocery stores per area ####
non_supermarket_strings <- c("DELI|TOBACCO|GAS|CANDY|7 ELEVEN|7-ELEVEN|LIQUOR|ALCOHOL|BAKERY|CHOCOLATE|DUANE READE|WALGREENS|CVS|RITE AID|RAVIOLI|WINERY|WINE|BEER|CAFE|COFFEE")
food_retail_filtered <- food_retail %>%
filter(County %in% NYC_boro_county_match$County) %>%
filter(str_detect(`Establishment Type`, "J") & str_detect(`Establishment Type`, "A") & str_detect(`Establishment Type`, "C") &
!str_detect(`Establishment Type`, "H")) %>%
filter(!str_detect(`Entity Name`, non_supermarket_strings) & !str_detect(`DBA Name`, non_supermarket_strings)) %>%
filter(`Square Footage`>=4500) %>%
mutate(zcta = as.character(str_extract(Location, "[:digit:]{5}"))) %>%
mutate(Address = case_when(
# some locations geocode better when address includes city name
`License Number` %in% c("638599", "712410", "706967", "710078") ~
paste(paste(`Street Number`, `Street Name`), City, State, zcta, sep = ","),
# but most geocode better without it, see limitation #4 at: http://gis.ny.gov/gisdata/inventories/details.cfm?DSID=1278
TRUE ~
paste(paste(`Street Number`, `Street Name`), State, zcta, sep = ",") )
)
# Geocode grocers, using a cached version if available to make analysis reproducible
# The geocoding service may be updated in the future and give different results
cached_grocers = here("data", "grocers_geocode_2020-11-09.csv")
if(file.exists(cached_grocers) & use_repo_data){
gctable <- read.csv(cached_grocers)
failed = which(gctable$score == 0)
message("Loaded cached geocoded grocers: ", nrow(gctable)-length(failed), "/", nrow(gctable), " have coordinates.") # 997/1037
if(nrow(gctable) != nrow(food_retail_filtered)) warning("Cached geocoded table has different row count than non-geocoded table")
} else {
# locations are returned in crs=26918, UTM 18N NAD83
api = "https://gisservices.its.ny.gov/arcgis/rest/services/Locators/Street_and_Address_Composite/GeocodeServer/findAddressCandidates?f=json&maxLocations=1&SingleLine="
message("Geocoding ", nrow(food_retail_filtered), " grocers with NY State ITS geocoder...")
t1 = Sys.time()
res = lapply(food_retail_filtered$Address, function(addr) {
GET(url = paste0(api, URLencode(addr))) })
tdiff = Sys.time() - t1
# extract results
geocodes = lapply(res, function(page) fromJSON(rawToChar(page$content), flatten = TRUE)$candidates)
failed = which(sapply(geocodes,class) != "data.frame")
geocodes[failed] <- lapply(1:length(failed), function(void){
data.frame(address = NA_character_, score = 0, location.x = NA_real_, location.y = NA_real_)})
gctable = bind_rows(geocodes)
message("Geocoded ", nrow(food_retail_filtered)-length(failed), "/", nrow(food_retail_filtered),
" grocers in ", round(as.numeric(tdiff),1), " ", attributes(tdiff)$units)
write.csv(gctable, paste0(data.root, "/grocers_geocode_", Sys.Date(), ".csv"))
}
## Loaded cached geocoded grocers: 997/1037 have coordinates.
# Count grocers by tract
gctable = filter(gctable, score > 0)
grocerSF = st_as_sf(gctable, coords = c("location.x", "location.y"), crs = 26918) %>% st_transform(crs = 2263)
tractSF = acs_tracts2[, "GEOID"] %>% st_transform(., crs = 2263)
tract_grocers = suppressWarnings(st_intersection(tractSF, grocerSF)) %>%
st_set_geometry(., NULL) %>%
group_by(GEOID) %>%
summarise(grocers = n_distinct(`address`))
#nrow(tract_grocers) # 749
#sum(tract_grocers$grocers) # 993
# Count grocers by ZCTA
zctaSF <- MODZCTA_NYC_shp %>% dplyr::select(modzcta, geometry) %>% st_transform(crs = 2263)
zcta_grocers <- suppressWarnings(st_intersection(zctaSF, grocerSF)) %>%
st_set_geometry(., NULL) %>%
group_by(modzcta) %>%
summarise(grocers = n_distinct(`address`))
# sum(zcta_grocers$grocers) # 993
# nrow(zcta_grocers) # 172
# range(zcta_grocers$grocers) # 1, 21
#### Subway station locations ####
SubwayStation_shp <- as_tibble(turnstile()$stations) %>%
st_as_sf(., coords = c("lon", "lat"), crs = 4269) %>%
st_transform(., crs = 2263) %>%
filter(!str_detect(ca, "PTH")) #removing New Jersey PATH stations
#### Residential area ####
Pluto_ResOnly <- Pluto %>%
filter(landuse>="01" & landuse<="04") %>%
mutate(base_bbl = as.character(bbl)) %>%
dplyr::select(-bbl)
ResBBLs <- as.character(Pluto_ResOnly$base_bbl)
# zcta-level residential area
Res_Bldg_Footprints <- Bldg_Footprints %>%
st_set_geometry(., NULL) %>%
mutate(base_bbl = as.character(base_bbl)) %>%
filter(base_bbl %in% ResBBLs &
feat_code == "2100") %>%
mutate(bldg_volume = shape_area * heightroof) %>%
left_join(., Pluto_ResOnly, by = "base_bbl") %>%
mutate(bldg_volume = if_else(is.na(bldg_volume), shape_area*numfloors*10, bldg_volume),
res_volume = (bldg_volume/unitstotal)*unitsres,
zcta = as.character(zipcode)) %>%
group_by(zcta) %>%
summarise(total_res_volume_zcta = sum(res_volume, na.rm = TRUE))
# tract-level residential area
Res_Bldg_Footprints2 <- Bldg_Footprints %>%
st_transform(crs = 2263) %>%
suppressWarnings(st_centroid(., of_largest_polygon = TRUE)) %>%
mutate(base_bbl = as.character(base_bbl)) %>%
filter(base_bbl %in% ResBBLs &
feat_code == "2100") %>%
mutate(bldg_volume = shape_area * heightroof) %>%
left_join(., Pluto_ResOnly, by = "base_bbl") %>%
mutate(bldg_volume = if_else(is.na(bldg_volume), shape_area*numfloors*10, bldg_volume),
res_volume = (bldg_volume/unitstotal)*unitsres)
get.tract.res <- function(res, tracts) suppressWarnings(st_intersection(res, tracts))
res_bldg_tract <- get.tract.res(Res_Bldg_Footprints2, tractSF) # 72 seconds
res_bldg_tract_sum <- st_set_geometry(res_bldg_tract, NULL) %>%
group_by(GEOID) %>%
summarise(total_res_volume_tract = sum(res_volume, na.rm = TRUE))
#nrow(res_bldg_tract_sum) # 2132
#### COVID Tests ####
MODZCTA_NYC_shp1 <- MODZCTA_NYC_shp %>%
dplyr::select(modzcta, geometry) %>%
rename("zcta" = "modzcta")
May7_tests <- ZCTA_test_series %>%
filter(date=="2020-05-07") %>%
mutate(zcta = as.character(MODZCTA)) %>%
rename(total_tests = "Total") %>%
dplyr::select(zcta, date, Positive, total_tests)
ZCTA_by_boro1 <- ZCTA_by_boro %>%
mutate(boro = as.character(boro),
zcta = as.character(zip)) %>%
dplyr::select(-zip) %>%
bind_rows(.,
tibble(boro = as.character(c("Manhattan", "Manhattan" ,"Queens")), #correcting nas
zcta = as.character(c("10069", "10282", "11109"))))
# get water mask for maps
source(here("code/water_mask.R"))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
# Figure 3B - Map of Tests by MODZCTA
theme_smallmaps <- theme(legend.title = element_text(face = "bold", size = 9),
plot.title = element_text(size = 9.5),
panel.background = element_rect(fill = "#cccccc"),
panel.grid = element_blank(),
legend.background = element_rect(fill = "transparent"),
legend.position = c(0.22, 0.80),
legend.text = element_text(size = 8.5),
plot.margin = unit(c(4,0,4,0), "pt"),
legend.key.size = unit(1.1, "lines"),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "pt"),
axis.title = element_blank())
fig3b <- MODZCTA_NYC_shp1 %>%
left_join(., May7_tests, by = "zcta") %>%
left_join(., ACS_Data2, by = "zcta") %>%
filter(zcta != "99999") %>%
mutate(pos_per_100000 = (Positive/total_pop1)*100000) %>%
ggplot() +
geom_sf(data = basemap_water, fill = "white", lwd = 0) +
geom_sf(aes(fill = pos_per_100000), lwd = 0.2)+
scalebar(MODZCTA_NYC_shp1, dist = 5, dist_unit = "km",
transform = TRUE, model = "WGS84",
st.size = 2.8, height = 0.015, border.size = 0.5,
anchor = c(x = -73.71, y = 40.51)) +
labs(fill = "Positives per 100,000") +
ggtitle("Cumulative positive COVID tests by zip code (May 7, 2020)") +
scale_fill_gradientn(colours=brewer_pal("YlGnBu", type = "seq")(7)) +
coord_sf(crs = st_crs(MODZCTA_NYC_shp1),
xlim = c(plot_bounds$xmin, plot_bounds$xmax),
ylim = c(plot_bounds$ymin, plot_bounds$ymax),
expand = FALSE) +
theme_bw(base_size = 6) +
theme_smallmaps
write_csv(st_drop_geometry(MODZCTA_NYC_shp1 %>%
left_join(., May7_tests, by = "zcta") %>%
left_join(., ACS_Data2, by = "zcta") %>%
filter(zcta != "99999") %>%
mutate(pos_per_100000 = (Positive/total_pop1)*100000)) %>% dplyr::select(zcta, pos_per_100000), file.path(source_path, "fig3b.csv"))
#### Create data frames of all above information ####
ZCTA_ACS_COVID_shp <- MODZCTA_NYC_shp1 %>%
st_transform(., crs = 2263) %>%
dplyr::select(zcta, geometry) %>%
left_join(., ACS_Data2, by = "zcta") %>%
left_join(., May7_tests, by = "zcta") %>%
left_join(., Res_Bldg_Footprints, by = "zcta") %>%
left_join(., ACS_EssentialWrkr_Commute1, by = "zcta") %>%
left_join(., zcta_grocers, by = c("zcta" = "modzcta")) %>%
mutate(pop_density = as.numeric(total_pop1/st_area(geometry)),
avg_hhold_size = round((total_pop1/total_hholds1), 2),
pos_per_100000 = (Positive/total_pop1)*100000,
testing_ratio = (total_tests/total_pop1),
res_vol_zctadensity = as.numeric(total_res_volume_zcta/st_area(geometry)),
res_vol_popdensity = as.numeric(total_pop1/total_res_volume_zcta),
pubtrans_ferrysubway_commute = pubtrans_subway_commute + pubtrans_ferry_commute,
grocers = replace_na(grocers, 0),
grocers_per_1000 = (grocers/total_pop1)*1000,
pos_per_100000 = round(pos_per_100000, 0),
valid_var = "0",
one_over_grocers_per_1000 = if_else(is.infinite(1/grocers_per_1000), (1/.0293)+1, 1/grocers_per_1000), #max + 1 for zero values
didnot_workhome_commute = 100 - workhome_commute,
one_over_medincome = 1/medincome) %>%
dplyr::select(-pubtrans_subway_commute, -pubtrans_ferry_commute) %>%
left_join(., ZCTA_by_boro1, by = "zcta") %>%
mutate_at(vars(starts_with("essentialworker_")), ~round((./over16total_industry1)*100, 2)) %>%
filter(zcta != "99999") #remove na
ZCTA_ACS_COVID <- ZCTA_ACS_COVID_shp %>%
st_set_geometry(., NULL) #remove geometry
tract_vars <- tractSF %>% # uses local CRS
left_join(., st_set_geometry(acs_tracts2, NULL), by = "GEOID") %>%
left_join(., acs_tracts_commute1, by = "GEOID") %>%
left_join(., res_bldg_tract_sum, by = "GEOID") %>%
left_join(., tract_grocers, by = "GEOID") %>%
mutate(pop_density = as.numeric(total_pop1/st_area(geometry)),
avg_hhold_size = round((total_pop1/total_hholds1), 2),
res_vol_tractdensity = as.numeric(total_res_volume_tract/st_area(geometry)),
res_vol_popdensity = as.numeric(total_pop1/total_res_volume_tract),
pubtrans_ferrysubway_commute = pubtrans_subway_commute + pubtrans_ferry_commute,
grocers = replace_na(grocers, 0),
grocers_per_1000 = (grocers/total_pop1)*1000,
valid_var = "0",
didnot_workhome_commute = 100 - workhome_commute,
one_over_grocers_per_1000 = if_else(is.infinite(1/grocers_per_1000), (1/.0293)+1, 1/grocers_per_1000),
one_over_medincome = 1/medincome) %>%
dplyr::select(-pubtrans_subway_commute, -pubtrans_ferry_commute) %>%
mutate_at(vars(starts_with("essentialworker_")), ~round((./over16total_industry1)*100, 2))
#### Tract to ZCTA weighted assignment ####
# Shares of residential addresses in ZCTAs by Tract are later used to select
# representative tract-level SES values at a specified location on the ECDF
modzcta_to_zcta_chr <- data.frame(ZCTA = as.character(modzcta_to_zcta$ZCTA),
MODZCTA = as.character(modzcta_to_zcta$MODZCTA))
# Calculate the proportion of population each combined ZCTA contributes to MODZCTA
modzcta_to_zcta_pop <- modzcta_to_zcta_chr %>%
left_join(ACS_Data_scaled[, c("GEOID", "total_pop1")], by = c("ZCTA" = "GEOID")) %>%
group_by(MODZCTA) %>%
mutate(sum_pop = sum(total_pop1)) %>%
ungroup() %>%
mutate(pop_prop = total_pop1/sum_pop) %>%
arrange(MODZCTA)
# Sum ZCTA->Tract RES_RATIO weights when multiple ZCTA combine to a single MODZCTA
modzcta_to_tract <- modzcta_to_zcta_pop %>%
filter(MODZCTA != "99999") %>%
# Weights of each ZCTA are scaled by their proportion of MODZCTA population
left_join(zip_to_tract, by = c("ZCTA" = "ZIP")) %>%
mutate(scaled_res_ratio = RES_RATIO * pop_prop) %>%
# note: MODZCTA 10001 & 10007 contain a ZCTA with a small share of population
# but a zero share of RES_RATIO. This makes their sum of scaled_res_ratio by
# MODZCTA less than 1, but this will not effect the weighted median or 3Q values
# by tract.
group_by(MODZCTA, TRACT) %>%
dplyr::summarize(SUM_RES_RATIO = sum(scaled_res_ratio), .groups = "drop") %>%
# Weights are then multiplied by the population density of each tract
left_join(tract_vars, by = c("TRACT" = "GEOID")) %>%
dplyr::select(MODZCTA, TRACT, SUM_RES_RATIO, total_pop1, total_hholds1) %>%
filter(total_hholds1 > 0) %>%
mutate(tract_popdens = total_pop1/total_hholds1) %>%
mutate(W_RES_RATIO = SUM_RES_RATIO * tract_popdens) %>%
dplyr::select(MODZCTA, TRACT, W_RES_RATIO)
modzcta_to_tract
## # A tibble: 3,020 x 3
## MODZCTA TRACT W_RES_RATIO
## <chr> <chr> <dbl>
## 1 10001 36061005600 0.0307
## 2 10001 36061005800 0.0648
## 3 10001 36061007100 0
## 4 10001 36061007600 0.250
## 5 10001 36061008400 0.00422
## 6 10001 36061009100 0.154
## 7 10001 36061009300 0.145
## 8 10001 36061009500 0.231
## 9 10001 36061009700 0.250
## 10 10001 36061009900 0.253
## # … with 3,010 more rows
# length(unique(modzcta_to_tract$MODZCTA)) # 177
# length(unique(modzcta_to_tract$TRACT)) # 2111
# length(unique(tractSF$GEOID)) # 2167
# check res_ratio against tracts with no population
tract_modzcta_pop <- modzcta_to_tract %>%
left_join(acs_tracts2, by = c("TRACT" = "GEOID")) %>%
dplyr::select(MODZCTA, TRACT, total_pop1, W_RES_RATIO)
# checking for tracts with no population but res_ratio > 0
# (not possible now that the weighted ratio uses population)
#tract_modzcta_pop %>% filter(total_pop1 == 0 & W_RES_RATIO > 0) %>% nrow() # 0
# check to make sure no MODZCTA have all zeroes for all tract res_ratios
all_zero_ratio <- tract_modzcta_pop %>% group_by(MODZCTA) %>% filter(all(W_RES_RATIO == 0))
if(nrow(all_zero_ratio) > 0){
warning("The following MODZCTA have all zeroes for weighted tract residents:\n ",
paste(all_zero_ratio$MODZCTA, collapse = ","))
}
modzcta_to_tract2 <- dplyr::select(tract_modzcta_pop, -total_pop1)
### Preparation done --- Now for the Analysis ###
#### Part 1: Creation of BWQS Neighborhood Infection Risk Scores ####
# Step 1: Create univariate scatterplots with negative binomial linear smoothers
# to make sure direction of associations are consistent for all variables
# Visualize the non-linear relationship between infection rate and test_ratio (not adjusted for social inequity)
# infections versus testing ratio with smoothers; this covariate is not quantiled so the shape of association is important
ggplot(ZCTA_ACS_COVID, aes(x = testing_ratio, y = pos_per_100000)) + geom_point() +
ylab("infections per 100,000") +
geom_smooth(color = "red", formula = y ~ x, method = "glm.nb") +
stat_smooth(color = "green", method = "gam", formula = y ~ s(x), method.args = list(family = "nb"), se = F) +
geom_smooth(method = "glm.nb", formula = y ~ splines::ns(x, 3), se = FALSE) + theme_bw()
# 10 social variables
plotsocial <- ggplot(ZCTA_ACS_COVID, aes(y = pos_per_100000)) +
geom_point() + geom_smooth(color = "red", formula = y ~ x, method = "glm.nb") + ylab("infections per 100,000") + theme_bw()
plotsocial %+% aes(x = one_over_medincome)
plotsocial %+% aes(x = not_insured)
plotsocial %+% aes(x = one_over_grocers_per_1000)
plotsocial %+% aes(x = unemployed)
plotsocial %+% aes(x = res_vol_popdensity)
plotsocial %+% aes(x = didnot_workhome_commute)
plotsocial %+% aes(x = not_quarantined_jobs)
plotsocial %+% aes(x = avg_hhold_size)
plotsocial %+% aes(x = essentialworker_drove)
plotsocial %+% aes(x = essentialworker_pubtrans)
SES_vars <- names(ZCTA_ACS_COVID %>% dplyr::select(one_over_medincome, not_insured, one_over_grocers_per_1000, unemployed,
not_quarantined_jobs, essentialworker_pubtrans, essentialworker_drove,
didnot_workhome_commute, res_vol_popdensity, avg_hhold_size))
# Step 2a: Examine relationships between explanatory variables to make sure nothing >0.9 correlation, as this could bias BWQS
Cors_SESVars <- cor(x = ZCTA_ACS_COVID %>% dplyr::select(all_of(SES_vars)), method = "kendall")
Cors_SESVars1 <- as.data.frame(Cors_SESVars)
Cors_SESVars1$var1 <- row.names(Cors_SESVars1)
Cors_SESVars2 <- gather(data = Cors_SESVars1, key = "var2", value = "correlation", -var1) %>%
filter(var1 != var2)
Cors_SESVars2 %>% arrange(desc(correlation)) %>% distinct(correlation, .keep_all = T)
## var1 var2 correlation
## 1 essentialworker_drove not_quarantined_jobs 0.6121033
## 2 essentialworker_pubtrans one_over_medincome 0.5432472
## 3 unemployed one_over_medincome 0.5341512
## 4 not_insured one_over_medincome 0.5270258
## 5 essentialworker_pubtrans unemployed 0.5243542
## 6 didnot_workhome_commute essentialworker_drove 0.5086620
## 7 avg_hhold_size not_quarantined_jobs 0.4787113
## 8 didnot_workhome_commute not_quarantined_jobs 0.4763680
## 9 avg_hhold_size didnot_workhome_commute 0.4598744
## 10 res_vol_popdensity one_over_medincome 0.4581407
## 11 res_vol_popdensity essentialworker_pubtrans 0.4544725
## 12 res_vol_popdensity not_insured 0.4382568
## 13 avg_hhold_size essentialworker_drove 0.4198843
## [ reached 'max' / getOption("max.print") -- omitted 32 rows ]
if(export.figs) {
fig_path = file.path(fig.path, paste0("sfig1_", Sys.Date()))
if(vector.figs){
svg(paste0(fig_path, ".svg"), pointsize = 9, width = 7, height = 6)
} else {
png(filename = paste0(fig_path, ".png"), width = 300*7, height = 300*6, pointsize = 30)
}
corrplot(Cors_SESVars, p.mat = Cors_SESVars, insig = "p-value", type = "lower",
sig.level = -1, tl.col = "black", tl.srt = 45)
dev.off()
if(vector.figs){
system(paste0("rsvg-convert -f pdf -o ", fig_path, ".pdf", " ", fig_path, ".svg"))
unlink(paste0(fig_path, ".svg"))
}
}
# Step 2b: Examine Univariable kendall associations for all selected variables with the outcome
bind_cols(Variables = SES_vars,
ZCTA_ACS_COVID %>%
dplyr::select(all_of(SES_vars), pos_per_100000) %>%
summarise_at(vars(all_of(SES_vars)), list(~cor(., pos_per_100000, method = "kendall"))) %>%
t() %>%
as_tibble(),
ZCTA_ACS_COVID %>%
dplyr::select(all_of(SES_vars), pos_per_100000) %>%
summarise_at(vars(all_of(SES_vars)),
list(~cor.test(., pos_per_100000, method = "kendall")$p.value)) %>%
t() %>%
as_tibble()) %>%
mutate(`Correlation (Tau)` = round(V1...2, 3),
`p value` = as.character(ifelse(V1...3 < 0.0001, "< 0.0001", round(V1...3, 3))),) %>%
dplyr::select(-V1...2, -V1...3)
## Warning: The `x` argument of `as_tibble.matrix()` must have column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## New names:
## * V1 -> V1...2
## * V1 -> V1...3
## # A tibble: 10 x 3
## Variables `Correlation (Tau)` `p value`
## <chr> <dbl> <chr>
## 1 one_over_medincome 0.322 < 0.0001
## 2 not_insured 0.236 < 0.0001
## 3 one_over_grocers_per_1000 0.268 < 0.0001
## 4 unemployed 0.294 < 0.0001
## 5 not_quarantined_jobs 0.542 < 0.0001
## 6 essentialworker_pubtrans 0.175 0.001
## 7 essentialworker_drove 0.483 < 0.0001
## 8 didnot_workhome_commute 0.453 < 0.0001
## 9 res_vol_popdensity 0.108 0.033
## 10 avg_hhold_size 0.463 < 0.0001
# Step 3: Prepare data for BWQS and pass to stan for model fitting
pm(fit_BWQS_model <- function(data_list, stan_model_path){
fitted_model <- stan(
file = stan_model_path,
data = data_list,
chains = 1,
warmup = 2500,
iter = 20000,
thin = 10,
refresh = 0,
algorithm = "NUTS",
seed = 1234,
control = list(max_treedepth = 20,
adapt_delta = 0.999999999999999))
return(fitted_model)
})
Compute_Bayes_R2 <- function(fit) {
y_pred = exp(extract(fit,"eta")$eta)
var_fit = apply(y_pred, 1, var)
mean_fit = apply(y_pred, 1, mean)
phi = extract(fit,"phi")$phi
var_res = mean_fit + (mean_fit^2)/phi
r2 = var_fit / (var_fit + var_res)
return(list(meanr2 = mean(r2),
medianr2 = median(r2),
lowerr2 = quantile(r2, .025),
upperr2 = quantile(r2, .975)))
}
prep_BWQS_data <- function(df, ses_varnames){
y = as.numeric(df$pos_per_100000)
X <- df %>%
dplyr::select(all_of(ses_varnames))
K <- ns(df$testing_ratio, df = 3)
for (vname in ses_varnames)
X[[vname]] <- ecdf(X[[vname]])(X[[vname]]) * 10
data <-as.data.frame(cbind(y,X)) # Aggregate data in a data.frame
data_list = list(N = NROW(data),
N_new = NROW(data),
C = NCOL(X),
K = NCOL(K),
XC = cbind(as.matrix(X)),
XK = cbind(K),
XC_new = cbind(as.matrix(X)),
XK_new = cbind(K),
Dalp = rep(1,length(ses_varnames)),
y = as.vector(data$y))
return(list(data_list = data_list, X = X, K = K))
}
m1data <- prep_BWQS_data(ZCTA_ACS_COVID, SES_vars)
t_dataprep = Sys.time()
# fit our primary model -- negative binomial
m1 <- fit_BWQS_model(m1data$data_list, BWQS_stan_model)
t_m1 = Sys.time()
#### BWQS model diagnostics ####
# print m1 for model diagnostics (n_eff as % of 1750, Rhat, trace plots, acf)
# m1
min(summary(m1)$summary[,"n_eff"]/1750) # effective sample size is at worst 78%
## [1] 0.6940459
traceplot(m1, pars = c("beta1", "W"))
stan_ac(m1, pars = c("beta1", "W"))
## Warning: Ignoring unknown parameters: fun.y
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
extract_waic(m1)
## $waic
## waic
## 2370.164
##
## $elpd_waic
## elpd_waic
## -1185.082
##
## $p_waic
## p_waic
## 15.05826
##
## $elpd_loo
## elpd_loo
## -1185.086
##
## $p_loo
## p_loo
## 15.06246
round(Compute_Bayes_R2(m1)$meanr2, 2)
## [1] 0.93
round(Compute_Bayes_R2(m1)$lowerr2, 2)
## 2.5%
## 0.92
round(Compute_Bayes_R2(m1)$upperr2, 2)
## 97.5%
## 0.95
# residual analysis with DHARMa
residuals_qq <- createDHARMa(simulatedResponse = t(extract(m1,"y_new")$y_new), observedResponse = m1data$data_list$y)
## No fitted predicted response provided, using the mean of the simulations
#plotQQunif(residuals_qq, testOutliers = F, testDispersion = F) #base graphics
ks_unif <- testUniformity(residuals_qq)
residuals_qq_unif <- gap::qqunif(residuals_qq$scaledResiduals,pch=2,bty="n", logscale = F, col = "black", cex = 0.6, main = "QQ plot residuals", cex.main = 1)
sfig3_qq <- ggplot(data.frame(x = residuals_qq_unif$x, y = residuals_qq_unif$y)) +
geom_abline(linetype = "dashed", color = "blue") +
scale_x_continuous(limits = c(0, 1), expand = c(0, 0)) +
scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
geom_point(aes(x, y), size = 0.8) +
annotate("text", x = 0.02, y = 0.9, hjust = 0, size = 3.2,
label = paste0("Kolmogorov-Smirnov test for\ncomparison of distributions: p=",
round(ks_unif$p.value, 2))) +
coord_equal(ratio = 1) +
labs(x = "Expected", y = "Observed") +
theme_bw() + theme(plot.margin = unit(c(5.5, 11, 5.5, 5.5), "points"))
if(export.figs){
write_figs(sfig3_qq, "sfig3", fig_width = 3.5, fig_height = 3.4)
}
## Wrote /home/rushj03/dev/COVID_19_admin_disparities/figures/sfig3_2021-06-17.png
write_csv(as_tibble(residuals_qq_unif), path = file.path(source_path, "sfig3.csv"))
# examine parameter estimates
exp(mean(extract(m1, "beta1")$beta1))
## [1] 1.077078
vars = c("phi", "beta0", "beta1", paste0("delta", 1:3), SES_vars)
parameters_to_drop <- c("phi", paste0("delta", 1:3), "beta0", "beta1")
number_of_coefficients <- length(vars)
BWQS_params <- bind_cols(as_tibble(summary(m1)$summary[1:number_of_coefficients,c(1,4,6,8:10)]), label = vars)
BWQS_params %>% filter(label == "beta1") %>% mutate_at(vars(1:3), ~exp(.))
## # A tibble: 1 x 7
## mean `2.5%` `50%` `97.5%` n_eff Rhat label
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 1.08 1.07 1.08 0.0856 1706. 1.00 beta1
BWQS_fits <- BWQS_params %>%
rename(lower = "2.5%",
upper = "97.5%") %>%
filter(!label %in% parameters_to_drop) %>%
arrange(desc(mean)) %>%
mutate(group = factor(if_else(label == "one_over_medincome"|label =="not_insured"|label == "unemployed", "Finances &\nAccess to care",
if_else(label == "one_over_grocers_per_1000", "Food\nAccess",
if_else(str_detect(label, "essential")|label == "not_quarantined_jobs"|label=="didnot_workhome_commute", "Commuting and\nEssential Work",
if_else(label == "avg_hhold_size"|label == "res_vol_popdensity", "Population\nDensity", "Unmatched")))),
levels = c("Commuting and\nEssential Work", "Finances &\nAccess to care", "Population\nDensity", "Food\nAccess")))
labels1 <- c("one_over_medincome" = "1/\nMedian income",
"not_insured" = "Uninsured",
"unemployed" = "Unemployed",
"one_over_grocers_per_1000" = "1/\nGrocers per 1000",
"not_quarantined_jobs" = "Essential Workers",
"essentialworker_pubtrans" = "Essential Worker:\nPublic Transit",
"essentialworker_drove" = "Essential Worker:\nDriving Commute",
"didnot_workhome_commute" = "1/\nWork from home",
"res_vol_popdensity" = "Population Density\nby Residential Volume",
"avg_hhold_size" = "Average people\nper household")
labels2 <- c("phi" = "Overdispersion",
"beta0" = "Intercept",
"beta1" = "BWQS term",
"delta1" = "Testing ratio: spline term 1",
"delta2" = "Testing ratio: spline term 2",
"delta3" = "Testing ratio: spline term 3",
labels1)
# Supplemental Table 2: Parameter estimates, credible intervals, and diagnostics from main BWQS infections model
BWQS_params %>% bind_cols(., "terms" = labels2) %>%
dplyr::select(-label) %>%
mutate_at(vars(1:6), ~round(., 3)) %>%
mutate_at(vars(5), ~round(., 0)) %>%
mutate(`95% CrI`=paste0("(",format(`2.5%`, nsmall=3),", ",format(`97.5%`, nsmall=3),")")) %>%
mutate(terms =
case_when(terms=="BWQS term" ~ "COVID-19 inequity index",
TRUE ~ terms)) %>%
dplyr::select(-`2.5%`, -`97.5%`) %>%
rename(median=`50%`) %>%
dplyr::select(terms, mean, `95% CrI`, median, Rhat, n_eff) %>%
kbl(align=rep('r', 6), font_size = 9.5) %>%
kable_classic(full_width = F, html_font = "Arial")
terms | mean | 95% CrI | median | Rhat | n_eff |
---|---|---|---|---|---|
Overdispersion | 104.050 | (81.448, 130.015) | 103.552 | 0.999 | 1636 |
Intercept | 6.261 | ( 6.173, 6.352) | 6.261 | 1.001 | 1643 |
COVID-19 inequity index | 0.074 | ( 0.063, 0.086) | 0.074 | 1.000 | 1706 |
Testing ratio: spline term 1 | 0.984 | ( 0.900, 1.072) | 0.986 | 1.002 | 1905 |
Testing ratio: spline term 2 | 2.029 | ( 1.809, 2.241) | 2.029 | 1.001 | 1661 |
Testing ratio: spline term 3 | 1.123 | ( 1.015, 1.233) | 1.125 | 1.000 | 1792 |
1/ Median income | 0.098 | ( 0.009, 0.219) | 0.093 | 0.999 | 1590 |
Uninsured | 0.181 | ( 0.067, 0.306) | 0.179 | 0.999 | 1496 |
Unemployed | 0.026 | ( 0.001, 0.077) | 0.021 | 1.000 | 1509 |
1/ Grocers per 1000 | 0.053 | ( 0.002, 0.138) | 0.046 | 1.000 | 1766 |
Essential Workers | 0.062 | ( 0.002, 0.181) | 0.050 | 1.000 | 1767 |
Essential Worker: Public Transit | 0.050 | ( 0.002, 0.140) | 0.043 | 0.999 | 1534 |
Essential Worker: Driving Commute | 0.167 | ( 0.033, 0.287) | 0.171 | 0.999 | 1690 |
1/ Work from home | 0.088 | ( 0.010, 0.196) | 0.083 | 1.002 | 1706 |
Population Density by Residential Volume | 0.115 | ( 0.018, 0.218) | 0.113 | 1.000 | 1702 |
Average people per household | 0.159 | ( 0.038, 0.293) | 0.159 | 1.000 | 1453 |
write_csv(BWQS_fits, file.path(source_path, "fig2.csv"))
# create a figure with parameter estimates for the weights
fig2 <- ggplot(data=BWQS_fits, aes(x= reorder(label, mean), y=mean, ymin=lower, ymax=upper)) +
geom_pointrange() +
coord_flip() +
xlab("") +
ylab("Variable Weight: Mean (95% credible intervals)") +
scale_x_discrete(labels = labels1) +
theme_bw(base_size = 18) +
facet_grid(group~., scales = "free", space = "free") +
theme(strip.text.x = element_text(size = 14))
if(export.figs){
write_figs(fig2, "fig2", fig_width = 8, fig_height = 8)
}
## Wrote /home/rushj03/dev/COVID_19_admin_disparities/figures/fig2_2021-06-17.png
# Step 4: Construct the summative COVID-19 inequity index value for each ZCTA
# Use the variable-specific weight on the quantiled splits to create a 10 point ZCTA-level infection risk score
BWQS_weights <- as.numeric(summary(m1)$summary[(length(vars)-length(SES_vars) + 1):number_of_coefficients,c(1)])
ZCTA_ACS_COVID2 <- m1data$X * BWQS_weights[col(ZCTA_ACS_COVID)]
BWQS_index <- ZCTA_ACS_COVID2 %>%
dplyr::mutate(BWQS_index = rowSums(.)) %>%
dplyr::select(BWQS_index)
# Visualize the relationship between BWQS and test_ratio
ggplot(data.frame(BWQS_index = BWQS_index$BWQS_index, testing_ratio = ZCTA_ACS_COVID$testing_ratio), aes(testing_ratio, BWQS_index)) + geom_point() +
geom_smooth() + geom_smooth(color = "red", method = "lm")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
# BWQS is correlated with testing_ratio
cor.test(BWQS_index$BWQS_index, ZCTA_ACS_COVID$testing_ratio, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: BWQS_index$BWQS_index and ZCTA_ACS_COVID$testing_ratio
## S = 473508, p-value = 2.703e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.487643
# Partial regression plot for BWQS index
# shows a nice linear relationship of BWQS index with infections after adjustment for testing
nb_testing_ns3df <- glm.nb(m1data$data_list$y ~ m1data$K)
yresid <- resid(nb_testing_ns3df)
bwqs_testing_ns3df <- lm(BWQS_index$BWQS_index ~ m1data$K)
bwqsresid <- resid(bwqs_testing_ns3df)
ggplot(data.frame(yresid, bwqsresid), aes(bwqsresid, yresid)) + geom_point() +
geom_smooth(formula = y ~ x, method = "lm", se = F) +
ylab("residual log infection rate\n(adjusted for testing)") + xlab("residual BWQS infection risk index\n(adjusted for testing)")
summary(lm(yresid ~ bwqsresid))
##
## Call:
## lm(formula = yresid ~ bwqsresid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.63000 -0.35184 0.01091 0.32491 1.93895
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05117 0.04792 -1.068 0.287
## bwqsresid 0.48676 0.02997 16.242 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6376 on 175 degrees of freedom
## Multiple R-squared: 0.6012, Adjusted R-squared: 0.5989
## F-statistic: 263.8 on 1 and 175 DF, p-value: < 2.2e-16
rm(nb_testing_ns3df, yresid, bwqs_testing_ns3df, bwqsresid)
# Visualize the full model observed vs predicted -- shows a close relationship (which is why R2 is so high)
preddf <- data.frame(data.frame(pred = colMeans(extract(m1,"y_new")$y_new), y = m1data$data_list$y))
ggplot(preddf, aes(pred, y)) +
geom_point() +
geom_abline(linetype = "dashed", color = "grey10") +
coord_fixed(ratio = 1) +
scale_x_continuous("Predicted Infections per 100,000", label=comma, limits = range(preddf)) +
scale_y_continuous("Infections per 100,000", label=comma) +
theme_bw()
rm(preddf)
# calculate credible interval over the mean predicted infections
# at the median testing_ratio
sim_out <- data.frame(extract(m1, pars = c("beta0", "beta1", "delta")))
median_testing <- predict(m1data$K, median(ZCTA_ACS_COVID$testing_ratio))
# calculate term for median testing_rate
sim_out$deltamedian <- with(sim_out, median_testing[1] * delta.1 +
median_testing[2] * delta.2 +
median_testing[3] * delta.3)
# make a sequence of BWQS values
xseqlength <- 300
bwqs_seq <- seq(from = min(BWQS_index$BWQS_index),
to = max(BWQS_index$BWQS_index),
length.out = xseqlength)
sim_matrix <- sim_out$beta0 %o% rep(1, xseqlength) +
sim_out$beta1 %o% bwqs_seq +
sim_out$deltamedian%o% rep(1, xseqlength)
sim_bwqs_df <- data.frame(bwqs_seq,
lower = exp(colQuantiles(sim_matrix, probs = 0.025)),
upper = exp(colQuantiles(sim_matrix, probs = 0.975)),
mean = exp(colMeans(sim_matrix)))
rm(median_testing, xseqlength, bwqs_seq, sim_matrix)
# calculate credible interval over the mean predicted infections
# at the median COVID-19 inequity index
sim_out$beta1median <- sim_out$beta1 * median(BWQS_index$BWQS_index)
# make a sequence of testing_ratio values
xseqlength <- 300
inequity_seq <- data.frame(x = seq(from = min(ZCTA_ACS_COVID$testing_ratio),
to = max(ZCTA_ACS_COVID$testing_ratio),
length.out = xseqlength))
inequity_seq <- data.frame(x = inequity_seq$x, t(sapply(inequity_seq$x, FUN = function(x) predict(m1data$K, x))))
sim_matrix <-
sim_out$beta0 %o% rep(1, xseqlength) +
sim_out$beta1median %o% rep(1, xseqlength) +
sim_out$delta.1 %o% inequity_seq$X1 +
sim_out$delta.2 %o% inequity_seq$X2 +
sim_out$delta.3 %o% inequity_seq$X3
sim_testing_df <- data.frame(x = inequity_seq$x,
lower = exp(colQuantiles(sim_matrix, probs = 0.025)),
upper = exp(colQuantiles(sim_matrix, probs = 0.975)),
mean = exp(colMeans(sim_matrix)))
rm(sim_out, xseqlength, inequity_seq, sim_matrix)
# Visualize the relationship between BWQS index and infection rate at the median testing_ratio
write_csv(sim_bwqs_df, file.path(source_path, "fig1_a.csv"))
write_csv(bind_cols(BWQS_index, y = m1data$data_list$y), file.path(source_path, "fig1_b.csv"))
BWQS_scatter <- ggplot(sim_bwqs_df, aes(x = bwqs_seq)) +
geom_ribbon(aes(ymin = lower, ymax = upper), fill = 'grey75') +
geom_line(aes(y = mean)) +
geom_point(data = data.frame(BWQS_index, y = m1data$data_list$y), aes(BWQS_index, y), alpha = 0.5) +
scale_x_continuous("COVID-19 inequity index") +
scale_y_continuous("Infections per 100,000", label=comma) +
theme_bw(base_size = 18)
BWQS_scatter <- ggExtra::ggMarginal(BWQS_scatter, type = "histogram", fill = "grey40", margins = "both",
xparams = list(binwidth = 0.5), yparams = list(binwidth = 200))
if(export.figs) {
write_figs(BWQS_scatter, "fig1", fig_width = 96*5, fig_height = 96*5)
}
## Wrote /home/rushj03/dev/COVID_19_admin_disparities/figures/fig1_2021-06-17.png
# Visualize the relationship between testing_ratio and infection rate at the median BWQS
write_csv(sim_testing_df, file.path(source_path, "sfig2.csv"))
testing_scatter <- ggplot(sim_testing_df, aes(x = x)) +
geom_ribbon(aes(ymin = lower, ymax = upper), fill = 'grey75') +
geom_line(aes(y = mean)) +
geom_point(data = data.frame(testing_ratio = ZCTA_ACS_COVID$testing_ratio, y = m1data$data_list$y),
aes(testing_ratio, y), alpha = 0.5) +
scale_x_continuous("Testing ratio") +
scale_y_continuous("Infections per 100,000", label=comma) +
theme_bw(base_size = 18)
testing_scatter <- ggExtra::ggMarginal(testing_scatter, type = "histogram", fill = "grey40", margins = "both",
xparams = list(binwidth = 0.005), yparams = list(binwidth = 200))
if(export.figs) {
write_figs(testing_scatter, "sfig2", fig_width = 96*5, fig_height = 96*5)
}
## Wrote /home/rushj03/dev/COVID_19_admin_disparities/figures/sfig2_2021-06-17.png
# Step 5: Visualize the spatial distribution of ZCTA-level infection risk scores
ZCTA_BWQS_COVID_shp <- ZCTA_ACS_COVID_shp %>%
bind_cols(., BWQS_index)
# reproject to WGS84 to be compatible with scalebar
ZCTA_BWQS_COVID_shp <- st_transform(ZCTA_BWQS_COVID_shp, 4326)
write_csv(st_drop_geometry(ZCTA_BWQS_COVID_shp %>% dplyr::select(zcta, BWQS_index)), file.path(source_path, "fig3a.csv"))
# Figure 3A - BWQS Index by ZCTA
fig3a <- ggplot() +
geom_sf(data = basemap_water, fill = "white", lwd = 0) +
geom_sf(data = ZCTA_BWQS_COVID_shp, aes(fill = BWQS_index), lwd = 0.2) +
scalebar(ZCTA_BWQS_COVID_shp, dist = 5, dist_unit = "km",
transform = TRUE, model = "WGS84",
st.size = 2.8, height = 0.015, border.size = 0.5, st.dist = 0.011,
anchor = c(x = -73.71, y = 40.495)) +
scale_fill_gradientn(colours=brewer_pal("YlGnBu", type = "seq")(7)) +
coord_sf(crs = st_crs(ZCTA_BWQS_COVID_shp),
xlim = c(plot_bounds$xmin, plot_bounds$xmax),
ylim = c(plot_bounds$ymin, plot_bounds$ymax),
expand = FALSE) +
theme_bw(base_size = 6) +
labs(fill = "COVID-19 Inequity Index") +
annotate("text", x = -73.709, y = 40.48, label = "water mask © OpenStreetMap contributors",
size = theme_get()$text[["size"]]/4, hjust = "right") +
theme(legend.title = element_text(face = "bold", size = 9),
panel.background = element_rect(fill = "#cccccc"),
legend.background = element_rect(fill = "transparent"),
panel.grid = element_blank(),
legend.position = c(0.125, 0.90),
legend.text = element_text(size = 8.5),
legend.key.size = unit(1.1, "lines"),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank())
# Step 6: Compare quantile distribution of ZCTA-level BWQS scores by the race/ethnic composition of residents
Demographics <- ACS_Data_scaled %>%
dplyr::select(GEOID, ends_with("_raceethnic"), total_pop1) %>%
left_join(., modzcta_to_zcta1, by = c("GEOID" = "ZCTA")) %>%
group_by(MODZCTA) %>%
summarise_at(vars(ends_with("_raceethnic"), total_pop1), ~sum(.)) %>%
mutate(zcta = as.character(MODZCTA),
other_raceethnic = total_pop1 - (hisplat_raceethnic + nonhispLat_white_raceethnic + nonhispLat_black_raceethnic +
nonhispLat_amerindian_raceethnic + nonhispLat_asian_raceethnic))
ZCTA_BQWS <- ZCTA_BWQS_COVID_shp %>%
st_set_geometry(., NULL) %>%
dplyr::select(zcta, BWQS_index)
Demographics_for_ridges <- Demographics %>%
left_join(., ZCTA_BQWS, by = "zcta") %>%
dplyr::select(-total_pop1) %>%
gather(key = "Race/Ethnicity", value = "Population", hisplat_raceethnic:other_raceethnic) %>%
mutate(`Race/Ethnicity` = if_else(`Race/Ethnicity`=="hisplat_raceethnic","Hispanic, LatinX",
if_else(`Race/Ethnicity`=="nonhispLat_black_raceethnic", "Black",
if_else(`Race/Ethnicity`=="nonhispLat_white_raceethnic", "white",
if_else(`Race/Ethnicity`== "nonhispLat_asian_raceethnic", "Asian", "other")))),
`Race/Ethnicity` = factor(`Race/Ethnicity`, levels = c( "white", "Asian", "other","Hispanic, LatinX","Black")),
Population = as.numeric(Population))
(Demographics_for_ridges %>%
group_by(`Race/Ethnicity`) %>%
summarise(weighted.mean(BWQS_index, Population),
weightedMedian(BWQS_index, Population)))
## # A tibble: 5 x 3
## `Race/Ethnicity` `weighted.mean(BWQS_index, Po… `weightedMedian(BWQS_index, P…
## <fct> <dbl> <dbl>
## 1 white 4.34 4.69
## 2 Asian 5.48 6.20
## 3 other 5.14 5.66
## 4 Hispanic, LatinX 6.10 6.61
## 5 Black 6.26 6.51
write_csv(Demographics_for_ridges, file.path(source_path, "fig4.csv"))
fig4 <- ggplot(Demographics_for_ridges,
aes(x = BWQS_index, y = `Race/Ethnicity`)) +
xlab("COVID-19 inequity index")+
geom_density_ridges(
aes(height = ..density..,
weight = Population / sum(Population)),
scale = 0.95,
stat ="density") +
scale_y_discrete(expand = c(0, 0)) +
expand_limits(y = 6) +
theme_bw(base_size = 18) +
theme(legend.position = "none")
if(export.figs){
write_figs(fig4, "fig4", fig_width = 8, fig_height = 5)
}
## Wrote /home/rushj03/dev/COVID_19_admin_disparities/figures/fig4_2021-06-17.png
Below_25th_zctas <- ZCTA_BQWS %>%
filter(BWQS_index<quantile(BWQS_index, .25))
Race_Ethncity_below25th <- Demographics %>%
filter(zcta %in% Below_25th_zctas$zcta) %>%
dplyr::select(-zcta, -MODZCTA) %>%
summarise_all(funs(sum(.))) %>%
mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2))) %>%
mutate(Group = "Below 25th percentile BWQS")
Between_25thand75th_zctas <- ZCTA_BQWS %>%
filter(BWQS_index>quantile(BWQS_index, .25) & BWQS_index<quantile(BWQS_index, .75))
Race_Ethncity_btw_25th75th <- Demographics %>%
filter(zcta %in% Between_25thand75th_zctas$zcta) %>%
dplyr::select(-zcta, -MODZCTA) %>%
summarise_all(funs(sum(.))) %>%
mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2)))%>%
mutate(Group = "Between 25-75th percentile BWQS")
Above_75th_zctas <- ZCTA_BQWS %>%
filter(BWQS_index>quantile(BWQS_index, .75))
Race_Ethncity_above_75th <- Demographics %>%
filter(zcta %in% Above_75th_zctas$zcta) %>%
dplyr::select(-zcta, -MODZCTA) %>%
summarise_all(funs(sum(.))) %>%
mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2))) %>%
mutate(Group = "Above 75th percentile BWQS")
Race_Ethncity_all <- Demographics %>%
dplyr::select(-zcta, -MODZCTA) %>%
summarise_all(funs(sum(.))) %>%
mutate_at(vars(ends_with("_raceethnic")), funs(round((./total_pop1)*100, 2)))%>%
mutate(Group = "NYC Population")
Demographics_by_BWQS <- bind_rows(Race_Ethncity_all, Race_Ethncity_below25th, Race_Ethncity_btw_25th75th, Race_Ethncity_above_75th) %>%
mutate(other = other_raceethnic + nonhispLat_amerindian_raceethnic) %>%
dplyr::select(-other_raceethnic, -nonhispLat_amerindian_raceethnic, - total_pop1) %>%
rename("Hispanic, LatinX" = "hisplat_raceethnic",
"Black" = "nonhispLat_black_raceethnic",
"white" = "nonhispLat_white_raceethnic",
"Asian" = "nonhispLat_asian_raceethnic") %>%
gather(., "Race/Ethnicity", "Proportion", 1:4,6) %>%
mutate(`Race/Ethnicity` = factor(`Race/Ethnicity`, levels = c("other", "Asian", "white", "Hispanic, LatinX", "Black")),
Group = factor(Group, levels = c("NYC Population", "Below 25th percentile BWQS", "Between 25-75th percentile BWQS", "Above 75th percentile BWQS")))
labels_demographics <- c("NYC Population" = "NYC Population", "Below 25th percentile BWQS" = "Below 25th\npercentile BWQS",
"Between 25-75th percentile BWQS" = "Between 25-75th\npercentile BWQS", "Above 75th percentile BWQS" = "Above 75th\npercentile BWQS")
labels_demographics <- c("NYC Population" = "NYC Population", "Below 25th percentile BWQS" = "Below 25th\n%ile index",
"Between 25-75th percentile BWQS" = "Between 25-75th\n%ile index", "Above 75th percentile BWQS" = "Above 75th\n%ile index")
write_csv(Demographics_by_BWQS, file.path(source_path, "sfig4.csv"))
sfig4 <- ggplot(Demographics_by_BWQS, aes(fill=`Race/Ethnicity`, y=Proportion, x=Group)) +
geom_rect(data = subset(Demographics_by_BWQS, Group=="NYC Population"),
aes(xmin=as.numeric(Group)-.35,xmax=as.numeric(Group)+.35, ymin=0, ymax=100, fill="gray85"), color = "gray", alpha = .1) +
geom_bar(data = subset(Demographics_by_BWQS, Group!="NYC Population"), position="stack", stat="identity", width = .75) +
geom_bar(data = subset(Demographics_by_BWQS, Group=="NYC Population"), position="stack", stat="identity", width = .45) +
scale_fill_manual(breaks = c("other", "Asian", "white", "Hispanic, LatinX", "Black"),
values = c("#984ea3","#ff7f00","gray85", "#4daf4a", "#e41a1c", "#377eb8")) +
geom_text(aes(label=ifelse(Proportion >= 5, paste0(sprintf("%.0f", Proportion),"%"),"")),
position=position_stack(vjust=0.5), colour="black", size = 8) +
scale_y_continuous("Percentage", expand = c(0,0), labels = function(x) paste0(x, "%")) +
scale_x_discrete(limits = c( "NYC Population", "Below 25th percentile BWQS", "Between 25-75th percentile BWQS", "Above 75th percentile BWQS"),
labels = labels_demographics) +
theme_bw(base_size = 16) +
theme(legend.text = element_text(size = 14),
axis.text.y = element_text(size = 16),
axis.text.x = element_text(size = 16, color = "black"),
axis.title.x = element_blank())
if(export.figs){
write_figs(sfig4, "sfig4", fig_width = 12, fig_height = 6)
}
## Wrote /home/rushj03/dev/COVID_19_admin_disparities/figures/sfig4_2021-06-17.png
t_postm1 = Sys.time()
#### BWQS by Tract ####
get_tract_vars_by_zcta <- function(tract_vars, modzcta_tract_crosswalk, whichq){
whichq = str_to_lower(whichq)
if(whichq %in% c("median", "q2", "2q", "2")){
qname = "median"
qnum = 2
} else if(whichq %in% c("q3", "3q", "3")){
qname = "3Q"
qnum = 3
} else { stop("unhandled quartile selected: ", whichq)}
ses_zcta <- tract_vars %>%
st_set_geometry(., NULL) %>%
left_join(., modzcta_tract_crosswalk, by = c("GEOID" = "TRACT")) %>%
filter(!is.na(MODZCTA)) %>%
group_by(MODZCTA) %>%
summarise(essentialworker_drove_rename = Hmisc::wtd.quantile(essentialworker_drove, W_RES_RATIO)[[qnum]],
essentialworker_pubtrans_rename = Hmisc::wtd.quantile(essentialworker_pubtrans, W_RES_RATIO)[[qnum]],
not_quarantined_jobs_rename = Hmisc::wtd.quantile(not_quarantined_jobs, W_RES_RATIO)[[qnum]],
didnot_workhome_commute_rename = Hmisc::wtd.quantile(didnot_workhome_commute, W_RES_RATIO)[[qnum]],
not_insured_rename = Hmisc::wtd.quantile(not_insured, W_RES_RATIO)[[qnum]],
one_over_medincome_rename = Hmisc::wtd.quantile(one_over_medincome, W_RES_RATIO)[[qnum]],
unemployed_rename = Hmisc::wtd.quantile(unemployed, W_RES_RATIO)[[qnum]],
avg_hhold_size_rename = Hmisc::wtd.quantile(avg_hhold_size, W_RES_RATIO)[[qnum]],
res_vol_popdensity_rename = Hmisc::wtd.quantile(res_vol_popdensity, W_RES_RATIO)[[qnum]],
one_over_grocers_per_1000_rename = Hmisc::wtd.quantile(one_over_grocers_per_1000, W_RES_RATIO)[[qnum]])
ses_zcta %>% rename_with(~ gsub("rename", qname, .x))
}
# Select median tract values by MODZCTA, weighted by residential address share and population density by tract
SES_zcta_median <- get_tract_vars_by_zcta(tract_vars, modzcta_to_tract2, "median")
SES_vars_median = names(SES_zcta_median)[names(SES_zcta_median) != "MODZCTA"]
# join SES to testing data: positive/100k and testing_ratio
SES_zcta_median_testing <- ZCTA_ACS_COVID %>%
dplyr::select(zcta, pos_per_100000, testing_ratio) %>%
left_join(SES_zcta_median, by = c("zcta" = "MODZCTA"))
Warnings for undefined values in log_lik[1:177]
and y_new[1:177]
in the two models below using tract-level data have been hidden. There were undefined values during an early part of the warmup period, which print out a long list of errors, but these do not impact the posterior draws (as seen by trace plots and the potential scale-reduction R_hat statistics).
# BWQS using median at the tract level
m2data_median <- prep_BWQS_data(SES_zcta_median_testing, SES_vars_median)
m2_median <- fit_BWQS_model(data_list = m2data_median$data_list, stan_model_path = BWQS_stan_model)
# Check for NA or Inf values in the post-warmup draws
m2_thinned_draws = m2_median@sim$samples[[1]]
m2_args = m2_median@stan_args[[1]]
m2_post_draws = (m2_args$warmup/m2_args$thin +1):(m2_args$iter/m2_args$thin)
range(m2_post_draws)
## [1] 251 2000
m2_check = sapply(m2_thinned_draws, function(pv) any(is.na(pv[m2_post_draws])) | any(is.infinite(pv[m2_post_draws])))
# Do any post-warmup draws have NA or Inf values?
any(m2_check)
## [1] FALSE
traceplot(m2_median, pars = c("beta1", "W"))
Parameter estimates, credible intervals, and diagnostics from model using median tract values by MODZCTA
BWQS_params_m2 <- bind_cols(as_tibble(summary(m2_median)$summary[1:number_of_coefficients,c(1,4,6,8:10)]), label = vars)
BWQS_params_m2 %>% bind_cols(., "terms" = labels2) %>%
dplyr::select(-label) %>%
mutate_at(vars(1:6), ~round(., 3)) %>%
mutate_at(vars(5), ~round(., 0)) %>%
mutate(`95% CrI`=paste0("(",format(`2.5%`, nsmall=3),", ",format(`97.5%`, nsmall=3),")")) %>%
mutate(terms =
case_when(terms=="BWQS term" ~ "COVID-19 inequity index",
TRUE ~ terms)) %>%
dplyr::select(-`2.5%`, -`97.5%`) %>%
rename(median=`50%`) %>%
dplyr::select(terms, mean, `95% CrI`, median, Rhat, n_eff) %>%
kbl(align=rep('r', 6), font_size = 9.5) %>%
kable_classic(full_width = F, html_font = "Arial")
terms | mean | 95% CrI | median | Rhat | n_eff |
---|---|---|---|---|---|
Overdispersion | 107.824 | (84.252, 134.502) | 107.535 | 1.004 | 1799 |
Intercept | 6.259 | ( 6.175, 6.342) | 6.259 | 1.000 | 1853 |
COVID-19 inequity index | 0.079 | ( 0.067, 0.091) | 0.079 | 1.000 | 1803 |
Testing ratio: spline term 1 | 0.950 | ( 0.865, 1.039) | 0.950 | 1.000 | 1451 |
Testing ratio: spline term 2 | 1.995 | ( 1.780, 2.213) | 1.995 | 0.999 | 1897 |
Testing ratio: spline term 3 | 1.147 | ( 1.034, 1.257) | 1.148 | 0.999 | 1776 |
1/ Median income | 0.141 | ( 0.026, 0.248) | 0.142 | 1.000 | 1879 |
Uninsured | 0.062 | ( 0.004, 0.147) | 0.057 | 1.000 | 1741 |
Unemployed | 0.064 | ( 0.002, 0.178) | 0.054 | 0.999 | 1563 |
1/ Grocers per 1000 | 0.056 | ( 0.003, 0.141) | 0.051 | 1.000 | 1863 |
Essential Workers | 0.186 | ( 0.087, 0.289) | 0.185 | 0.999 | 1950 |
Essential Worker: Public Transit | 0.090 | ( 0.010, 0.186) | 0.089 | 1.000 | 1786 |
Essential Worker: Driving Commute | 0.046 | ( 0.002, 0.122) | 0.041 | 1.000 | 1761 |
1/ Work from home | 0.286 | ( 0.190, 0.389) | 0.287 | 1.001 | 1686 |
Population Density by Residential Volume | 0.040 | ( 0.002, 0.108) | 0.035 | 1.000 | 1793 |
Average people per household | 0.028 | ( 0.001, 0.072) | 0.025 | 1.001 | 1722 |
t_m2 = Sys.time()
# Select third quartile tract values by MODZCTA, weighted by residential address share and population density by tract
SES_zcta_3q <- get_tract_vars_by_zcta(tract_vars, modzcta_to_tract2, "3q")
SES_vars_3q = names(SES_zcta_3q)[names(SES_zcta_3q) != "MODZCTA"]
SES_zcta_3q_testing <- ZCTA_ACS_COVID %>%
dplyr::select(zcta, pos_per_100000, testing_ratio) %>%
left_join(SES_zcta_3q, by = c("zcta" = "MODZCTA"))
#BWQS using the 3rd quartile at the tract level
m2data_3q <- prep_BWQS_data(SES_zcta_3q_testing, SES_vars_3q)
m2_3q <- fit_BWQS_model(data_list = m2data_3q$data_list, stan_model_path = BWQS_stan_model)
# Check for NA or Inf values in the post-warmup draws
m2_3q_thinned_draws = m2_3q@sim$samples[[1]]
m2_3q_args = m2_3q@stan_args[[1]]
m2_3q_post_draws = (m2_3q_args$warmup/m2_3q_args$thin +1):(m2_3q_args$iter/m2_3q_args$thin)
range(m2_3q_post_draws)
## [1] 251 2000
m2_3q_check = sapply(m2_3q_thinned_draws, function(pv) any(is.na(pv[m2_3q_post_draws])) | any(is.infinite(pv[m2_3q_post_draws])))
# Do any post-warmup draws have NA or Inf values?
any(m2_3q_check)
## [1] FALSE
traceplot(m2_3q, pars = c("beta1", "W"))
Parameter estimates, credible intervals, and diagnostics from model using third quartile tract values by MODZCTA
BWQS_params_m2_3q <- bind_cols(as_tibble(summary(m2_3q)$summary[1:number_of_coefficients,c(1,4,6,8:10)]), label = vars)
BWQS_params_m2_3q %>% bind_cols(., "terms" = labels2) %>%
dplyr::select(-label) %>%
mutate_at(vars(1:6), ~round(., 3)) %>%
mutate_at(vars(5), ~round(., 0)) %>%
mutate(`95% CrI`=paste0("(",format(`2.5%`, nsmall=3),", ",format(`97.5%`, nsmall=3),")")) %>%
mutate(terms =
case_when(terms=="BWQS term" ~ "COVID-19 inequity index",
TRUE ~ terms)) %>%
dplyr::select(-`2.5%`, -`97.5%`) %>%
rename(median=`50%`) %>%
dplyr::select(terms, mean, `95% CrI`, median, Rhat, n_eff) %>%
kbl(align=rep('r', 6), font_size = 9.5) %>%
kable_classic(full_width = F, html_font = "Arial")
terms | mean | 95% CrI | median | Rhat | n_eff |
---|---|---|---|---|---|
Overdispersion | 109.752 | (85.968, 137.586) | 109.024 | 1.001 | 1689 |
Intercept | 6.254 | ( 6.172, 6.341) | 6.255 | 1.000 | 1491 |
COVID-19 inequity index | 0.081 | ( 0.069, 0.093) | 0.081 | 0.999 | 1725 |
Testing ratio: spline term 1 | 0.948 | ( 0.861, 1.033) | 0.947 | 1.002 | 1778 |
Testing ratio: spline term 2 | 1.987 | ( 1.774, 2.187) | 1.988 | 1.000 | 1666 |
Testing ratio: spline term 3 | 1.157 | ( 1.057, 1.264) | 1.157 | 1.002 | 1654 |
1/ Median income | 0.180 | ( 0.066, 0.280) | 0.183 | 0.999 | 1592 |
Uninsured | 0.090 | ( 0.009, 0.181) | 0.090 | 1.000 | 1717 |
Unemployed | 0.048 | ( 0.001, 0.141) | 0.038 | 1.000 | 1717 |
1/ Grocers per 1000 | 0.067 | ( 0.005, 0.155) | 0.062 | 1.000 | 1942 |
Essential Workers | 0.180 | ( 0.085, 0.274) | 0.178 | 0.999 | 1933 |
Essential Worker: Public Transit | 0.078 | ( 0.006, 0.171) | 0.075 | 1.002 | 1745 |
Essential Worker: Driving Commute | 0.036 | ( 0.001, 0.101) | 0.031 | 1.000 | 1674 |
1/ Work from home | 0.248 | ( 0.154, 0.345) | 0.247 | 0.999 | 1663 |
Population Density by Residential Volume | 0.050 | ( 0.003, 0.122) | 0.046 | 0.999 | 1811 |
Average people per household | 0.023 | ( 0.001, 0.065) | 0.020 | 1.000 | 1549 |
t_m2_3q = Sys.time()
Summarize_BWQS_performance <- function(model, df){
model_type = deparse(substitute(model))
waic = extract_waic(model)$waic[[1]]
bayesr2 = Compute_Bayes_R2(model)$meanr2
rmse = sqrt(mean((df$pos_per_100000 - colMeans(extract(model,"y_new")$y_new))^2))
effect_estimate = exp(mean(extract(model, "beta1")$beta1))
summarized <- bind_cols(model_name = as.character(model_type), waic = waic, bayesr2 = bayesr2, rmse = rmse, effect_estimate = effect_estimate)
return(summarized)
}
bind_rows(Summarize_BWQS_performance(m1, SES_zcta_median_testing),
Summarize_BWQS_performance(m2_median, SES_zcta_median_testing),
Summarize_BWQS_performance(m2_3q, SES_zcta_3q_testing),
)
## # A tibble: 3 x 5
## model_name waic bayesr2 rmse effect_estimate
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 m1 2370. 0.934 184. 1.08
## 2 m2_median 2362. 0.936 183. 1.08
## 3 m2_3q 2359. 0.937 183. 1.08
#### Part 3: Spatial analysis of mortality in relation to BWQS scores ####
# Step 1: Create dataframes with the relevant information
deaths_by23May2020_by_zcta1 <- deaths_by23May2020_by_zcta %>%
left_join(., modzcta_to_zcta, by = c("MODIFIED_ZCTA" = "MODZCTA")) %>%
mutate(zcta = as.character(MODIFIED_ZCTA)) %>%
rename("deaths_count" = "COVID_DEATH_COUNT") %>%
dplyr::select(zcta, deaths_count, COVID_DEATH_RATE) %>%
distinct(zcta, .keep_all = T)
ZCTA_BWQS_COVID_shp1 <- ZCTA_ACS_COVID_shp %>%
left_join(.,ZCTA_BWQS_score, by = "zcta") %>%
left_join(., deaths_by23May2020_by_zcta1, by = "zcta") %>%
mutate(prop_65plus = age65_plus/total_pop1,
zcta = as.numeric(zcta))
write_csv(st_drop_geometry(ZCTA_BWQS_COVID_shp1 %>% dplyr::select(zcta, COVID_DEATH_RATE)), file.path(source_path, "fig3c.csv"))
# Figure 3C - Mortality
fig3c <- ZCTA_BWQS_COVID_shp1 %>%
ggplot() +
geom_sf(data = basemap_water, fill = "white", lwd = 0) +
geom_sf(aes(fill = COVID_DEATH_RATE), lwd = 0.2)+
scalebar(MODZCTA_NYC_shp1, dist = 5, dist_unit = "km",
transform = TRUE, model = "WGS84",
st.size = 2.8, height = 0.015, border.size = 0.5,
anchor = c(x = -73.71, y = 40.51)) +
labs(fill = "Mortality per 100,000") +
ggtitle("Cumulative COVID mortality by zip code (May 23, 2020)") +
scale_fill_gradientn(colours=brewer_pal("YlGnBu", type = "seq")(7)) +
coord_sf(crs = st_crs(ZCTA_BWQS_COVID_shp),
xlim = c(plot_bounds$xmin, plot_bounds$xmax),
ylim = c(plot_bounds$ymin, plot_bounds$ymax),
expand = FALSE) +
theme_bw(base_size = 6) +
theme_smallmaps
# Combine subfigures to make Figure 3: maps of BWQS, Tests, and Mortality by ZCTA
fig3a_2 = fig3a + labs(tag = "a") + theme(plot.tag.position = c(-0.018, 0.985), plot.tag = element_text(face = "bold", size = 15))
fig3b_2 = fig3b + labs(tag = "b") + theme(plot.tag.position = c(-0.028, 0.93), plot.tag = element_text(face = "bold", size = 15))
fig3c_2 = fig3c + labs(tag = "c") + theme(plot.tag.position = c(-0.028, 0.93), plot.tag = element_text(face = "bold", size = 15))
plotres = 300
if(export.figs) {
if(vector.figs){
fig3_path = file.path(fig.path, paste0("fig3_combined_", Sys.Date(), ".svg"))
svg(filename = fig3_path, width = 1.9*4.25, height = 1.9*6)
} else {
agg_png(filename = file.path(fig.path, paste0("fig3_combined_", Sys.Date(), ".png")),
width = plotres*4.25, height = plotres*6, scaling = 2)
}
grid.arrange(arrangeGrob(fig3a_2, ncol = 1, nrow = 1),
arrangeGrob(fig3b_2, fig3c_2, ncol = 2, nrow = 1),
nrow = 2, ncol = 1, heights = c(1.9,1))
dev.off()
if(vector.figs){
system(paste0("rsvg-convert -f pdf -o ", str_sub(fig3_path,1,-5), ".pdf", " ", fig3_path))
unlink(fig3_path)
}
}
# Step 2: Run negative binomial model with spatial filtering
# Step 2a: Identify the neighbor relationships
spdat.sens <- as_Spatial(ZCTA_BWQS_COVID_shp1)
ny.nb6 <- knearneigh(sp::coordinates(spdat.sens), k=6)
ny.nb6 <- knn2nb(ny.nb6)
ny.nb6 <- make.sym.nb(ny.nb6)
ny.wt6 <- nb2listw(ny.nb6, style="W")
# Step 2b: Fit the model to identify the component of the data with substantial spatial autocorrelation
fit.nb.ny.sens<-glm.nb(deaths_count~offset(log(total_pop1))+BWQS_index, spdat.sens)
lm.morantest(fit.nb.ny.sens, listw = ny.wt6)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = deaths_count ~ offset(log(total_pop1)) +
## BWQS_index, data = spdat.sens, init.theta = 6.644443327, link = log)
## weights: ny.wt6
##
## Moran I statistic standard deviate = 3.2076, p-value = 0.0006692
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.126081335 -0.001798209 0.001589415
me.fit.sens <- spatialreg::ME(deaths_count~offset(log(total_pop1))+BWQS_index,
spdat.sens@data, family=negative.binomial(fit.nb.ny.sens$theta), listw = ny.wt6, verbose=T, alpha=.1, nsim = 999)
## eV[,16], I: 0.05661728 ZI: NA, pr(ZI): 0.06
## eV[,23], I: 0.01195511 ZI: NA, pr(ZI): 0.297
# Step 2c: Pull out these fits and visualize the autocorrelation
fits.sens <- data.frame(fitted(me.fit.sens))
spdat.sens$me16 <- fits.sens$vec16
spdat.sens$me23 <- fits.sens$vec23
spplot(spdat.sens, "me16", at=quantile(spdat.sens$me16, p=seq(0,1,length.out = 7)))
spplot(spdat.sens, "me23", at=quantile(spdat.sens$me23, p=seq(0,1,length.out = 7)))
# Step 2d: Include the fits in our regression model as an additional parameter
clean.nb.sens<-glm.nb(deaths_count~offset(log(total_pop1))+BWQS_index+fitted(me.fit.sens), spdat.sens@data)
tidy(clean.nb.sens) %>% mutate(estimate_exp = exp(estimate))
## # A tibble: 4 x 6
## term estimate std.error statistic p.value estimate_exp
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -7.29 0.0842 -86.6 0. 0.000681
## 2 BWQS_index 0.180 0.0151 11.9 8.78e-33 1.20
## 3 fitted(me.fit.sens)vec16 -0.922 0.384 -2.40 1.64e- 2 0.398
## 4 fitted(me.fit.sens)vec23 -1.78 0.383 -4.65 3.25e- 6 0.169
as_tibble(confint(clean.nb.sens), rownames = "vars")%>% mutate_at(vars(2:3), .funs = list(~exp(.)))
## Waiting for profiling to be done...
## # A tibble: 4 x 3
## vars `2.5 %` `97.5 %`
## <chr> <dbl> <dbl>
## 1 (Intercept) 0.000576 0.000807
## 2 BWQS_index 1.16 1.23
## 3 fitted(me.fit.sens)vec16 0.188 0.841
## 4 fitted(me.fit.sens)vec23 0.0785 0.361
lm.morantest(clean.nb.sens, resfun = residuals, listw=ny.wt6)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = deaths_count ~ offset(log(total_pop1)) +
## BWQS_index + fitted(me.fit.sens), data = spdat.sens@data, init.theta =
## 7.95088153, link = log)
## weights: ny.wt6
##
## Moran I statistic standard deviate = 1.6812, p-value = 0.04636
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.065068136 -0.002561584 0.001618272
spdat.sens$nb_residuals <- clean.nb.sens$residuals
spplot(spdat.sens, "nb_residuals", at=quantile(spdat.sens$nb_residuals, p=seq(0,1,length.out = 10)))
pal_sfig10 <- brewer_pal(type = "div", palette = "RdYlBu")(5)[2:5]
pal_sfig10[[2]] <- "#fafadf" # make yellow less saturated
write_csv(bind_cols(zcta = spdat.sens[["zcta"]], nb_residuals = spdat.sens[["nb_residuals"]]), file.path(source_path, "sfig10.csv"))
sfig10 <- ggplot() +
geom_sf(data = basemap_water, fill = "white", lwd = 0) +
geom_sf(data = st_as_sf(spdat.sens), aes(fill = cut_width(nb_residuals, width = 1)), lwd = 0.2) +
scale_fill_discrete(type = pal_sfig10) +
coord_sf(crs = st_crs(MODZCTA_NYC_shp1),
xlim = c(plot_bounds$xmin, plot_bounds$xmax),
ylim = c(plot_bounds$ymin, plot_bounds$ymax),
expand = FALSE) +
labs(fill = "Residuals\n(Standard Deviations)") +
annotate("text", x = -73.709, y = 40.48, label = "water mask © OpenStreetMap contributors",
size = theme_get()$text[["size"]]/6, hjust = "right") +
theme_bw() +
theme(panel.background = element_rect(fill = "#cccccc"),
panel.grid = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
legend.title = element_text(face = "bold", size = 9),
legend.text = element_text(size = 8.5),
plot.margin = unit(c(0.1,0.1,0.1,0.1), "in")
)
if(export.figs){
write_figs(sfig10, "sfig10", fig_width = 5, fig_height = 3.2)
}
## Wrote /home/rushj03/dev/COVID_19_admin_disparities/figures/sfig10_2021-06-17.png
#### Sensitivity Analyses ####
# half-Cauchy prior for overdispersion parameter
BWQS_NB_halfcauchy_stan_model <- here("code", "nb_bwqs_halfcauchy.stan")
m1_halfcauchy <- fit_BWQS_model(m1data$data_list, BWQS_NB_halfcauchy_stan_model)
# beta1 effect estimate using half-cauchy prior for overdispersion
exp(summary(m1_halfcauchy)$summary[3,c(1,4,8)])
## mean 2.5% 97.5%
## 1.076923 1.065495 1.088503
# main model (inverse gamma prior for overdispersion)
exp(summary(m1)$summary[3,c(1,4,8)])
## mean 2.5% 97.5%
## 1.077078 1.065505 1.089368
# no meaningful difference in effect estimate or credible interval!
# BWQS weights -- stability of the rankings
as_tibble(extract(m1, c("W[1]","W[2]", "W[3]", "W[4]", "W[5]", "W[6]", "W[7]", "W[8]", "W[9]", "W[10]"))) %>%
rownames_to_column("iteration") %>%
gather(weight, value, "W[1]":"W[10]") %>%
group_by(iteration) %>%
slice(which.max(value)) %>%
ungroup() %>%
group_by(weight) %>%
dplyr::summarise(times_largest_weight = n()) %>%
mutate(pct_largest_weight = round((times_largest_weight/1750)*100, 2))
## # A tibble: 9 x 3
## weight times_largest_weight pct_largest_weight
## <chr> <int> <dbl>
## 1 W[1] 72 4.11
## 2 W[10] 455 26
## 3 W[2] 561 32.1
## 4 W[4] 5 0.290
## 5 W[5] 13 0.74
## 6 W[6] 1 0.06
## 7 W[7] 523 29.9
## 8 W[8] 48 2.74
## 9 W[9] 72 4.11
# using essential workers, testing ratio, and median income
glm_esstl_and_income <- glm.nb(pos_per_100000 ~ not_quarantined_jobs + medincome + testing_ratio, data = ZCTA_ACS_COVID)
broom::tidy(glm_esstl_and_income)
## # A tibble: 4 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 6.34 0.0701 90.4 0.
## 2 not_quarantined_jobs 0.0185 0.00248 7.46 8.96e- 14
## 3 medincome -0.00000250 0.000000359 -6.96 3.36e- 12
## 4 testing_ratio 19.5 0.888 22.0 4.07e-107
enframe(predict(glm_esstl_and_income)) %>% mutate(value = exp(value))
## # A tibble: 177 x 2
## name value
## <chr> <dbl>
## 1 1 1293.
## 2 2 1301.
## 3 3 877.
## 4 4 870.
## 5 5 710.
## 6 6 787.
## 7 7 677.
## 8 8 1200.
## 9 9 987.
## 10 10 891.
## # … with 167 more rows
# Just using proportion uninsured and median income
glm_insurance_and_income <- glm.nb(pos_per_100000 ~ not_insured + medincome, data = ZCTA_ACS_COVID)
broom::tidy(glm_insurance_and_income)
## # A tibble: 3 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 8.14 0.120 68.0 0.
## 2 not_insured -0.00420 0.00857 -0.490 6.24e- 1
## 3 medincome -0.00000737 0.000000891 -8.26 1.41e-16
# PCA of social variables
pca_socialvars <- prcomp(ZCTA_ACS_COVID %>% dplyr::select(all_of(SES_vars)), center = T, scale. = T)
PC1 <- enframe(pca_socialvars$rotation[,1]) %>% arrange(desc(value)) %>% rename("label" = "name")
df_for_PCA_analysis <- ZCTA_ACS_COVID %>% bind_cols(., PC1 = pca_socialvars$x[,1])
# Compare BWQS ranks to Principal Components ranks
BWQS_fits %>%
dplyr::select(mean, label) %>%
mutate(rank_bwqs = rank(mean)) %>%
left_join(., PC1, by = "label") %>%
mutate(rank_pca = rank(value)) %>%
dplyr::select(2, 3, 5, 1, 4)
## # A tibble: 10 x 5
## label rank_bwqs rank_pca mean value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 not_insured 10 4 0.181 0.317
## 2 essentialworker_drove 9 2 0.167 0.201
## 3 avg_hhold_size 8 9 0.159 0.382
## 4 res_vol_popdensity 7 3 0.115 0.216
## 5 one_over_medincome 6 8 0.0976 0.363
## 6 didnot_workhome_commute 5 7 0.0884 0.359
## 7 not_quarantined_jobs 4 10 0.0617 0.397
## 8 unemployed 3 6 0.0532 0.358
## 9 essentialworker_pubtrans 2 5 0.0500 0.335
## 10 one_over_grocers_per_1000 1 1 0.0264 0.0890
glm_princomp <- glm.nb(pos_per_100000 ~ PC1 + ns(testing_ratio, df = 3), data = df_for_PCA_analysis)
summary(glm_princomp)
##
## Call:
## glm.nb(formula = pos_per_100000 ~ PC1 + ns(testing_ratio, df = 3),
## data = df_for_PCA_analysis, init.theta = 94.08962057, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.5314 -0.5865 0.0748 0.5553 3.0714
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.635831 0.047047 141.05 <2e-16 ***
## PC1 0.071807 0.004999 14.36 <2e-16 ***
## ns(testing_ratio, df = 3)1 0.980427 0.042311 23.17 <2e-16 ***
## ns(testing_ratio, df = 3)2 2.032569 0.106295 19.12 <2e-16 ***
## ns(testing_ratio, df = 3)3 1.135794 0.053268 21.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(94.0896) family taken to be 1)
##
## Null deviance: 2796.19 on 176 degrees of freedom
## Residual deviance: 179.89 on 172 degrees of freedom
## AIC: 2381.1
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 94.1
## Std. Err.: 10.7
##
## 2 x log-likelihood: -2369.08
exp(confint(glm.nb(pos_per_100000 ~ PC1 + ns(testing_ratio, df = 3), data = df_for_PCA_analysis)))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 694.185891 836.816961
## PC1 1.063885 1.085095
## ns(testing_ratio, df = 3)1 2.456061 2.892820
## ns(testing_ratio, df = 3)2 6.173665 9.430225
## ns(testing_ratio, df = 3)3 2.804839 3.461578
enframe(predict(glm_princomp)) %>% mutate(glm_princomp = exp(value)) %>% dplyr::select(glm_princomp)
## # A tibble: 177 x 1
## glm_princomp
## <dbl>
## 1 1339.
## 2 1089.
## 3 727.
## 4 942.
## 5 741.
## 6 819.
## 7 891.
## 8 1065.
## 9 919.
## 10 809.
## # … with 167 more rows
enframe(predict(glm_esstl_and_income)) %>% mutate(glm_esstl_and_income = exp(value)) %>% dplyr::select(glm_esstl_and_income)
## # A tibble: 177 x 1
## glm_esstl_and_income
## <dbl>
## 1 1293.
## 2 1301.
## 3 877.
## 4 870.
## 5 710.
## 6 787.
## 7 677.
## 8 1200.
## 9 987.
## 10 891.
## # … with 167 more rows
Compare_Metrics <- bind_cols(ZCTA_ACS_COVID,
glm_princomp = enframe(predict(glm_princomp)) %>% mutate(glm_princomp = exp(value)) %>% dplyr::select(glm_princomp),
glm_esstl_and_income = enframe(predict(glm_esstl_and_income)) %>% mutate(glm_esstl_and_income = exp(value)) %>% dplyr::select(glm_esstl_and_income),
BWQS_index = colMeans(extract(m1,"y_new")$y_new))
# comparison of RMSE and Kendall's tau metrics for three different modeling approaches
# Supplemental Table 4
bind_rows(Compare_Metrics %>%
summarise_at(vars(c("BWQS_index", "glm_princomp", "glm_esstl_and_income")),
~cor(pos_per_100000, .x, method = "kendall")) %>%
mutate(parameter = "kendalls"),
Compare_Metrics %>%
summarise_at(vars(c("BWQS_index", "glm_princomp", "glm_esstl_and_income")),
~sqrt(mean(pos_per_100000-.x)^2)) %>%
mutate(parameter = "RMSE"))
## # A tibble: 2 x 4
## BWQS_index glm_princomp glm_esstl_and_income parameter
## <dbl> <dbl> <dbl> <chr>
## 1 0.869 0.850 0.823 kendalls
## 2 2.67 2.14 8.10 RMSE
#### Subway analyses ####
# Different BWQS groupings for subway analysis (<.25, .25-.75, .75)
UHF_BWQS_COVID_3split_shp <- UHF_shp %>%
mutate(uhf = as.character(UHFCODE)) %>%
left_join(., UHF_BWQS, by = "uhf") %>%
mutate(Risk = factor(ifelse(uhf_weighted_bwqs < quantile(uhf_weighted_bwqs, probs = .25, na.rm = T), "Low",
if_else(uhf_weighted_bwqs > quantile(uhf_weighted_bwqs, probs = .75, na.rm = T), "High", "Mid")),
levels = c("High", "Mid", "Low")))
Subway_BWQS_3split_df <- Subway_ridership_by_UHF %>%
left_join(., st_set_geometry(UHF_BWQS_COVID_3split_shp, NULL), by = c("place" = "uhf")) %>%
filter(!is.na(place) & date >= "2020-01-29" & date <= "2020-04-30") %>%
group_by(place) %>%
arrange(date) %>%
mutate(time_index = time(date)) %>%
filter(!is.na(Risk) & date != "2020-02-17") %>% #removing Presidents' Day national holiday
anti_join(., service_changes_in_lowsubway_areas, by = c("date", "place")) #removing low outliers due to service changes in low subway density areas
fit_drm_risk_3split <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_3split_df, separate = F)
anova(fit_drm_all, fit_drm_risk_3split)
##
## 1st model
## fct: W2.4()
## pmodels: 1 (for all parameters)
## 2nd model
## fct: W2.4()
## pmodels: Risk (for all parameters)
## ANOVA table
##
## ModelDf RSS Df F value p value
## 1st model 3291 21.285
## 2nd model 3283 15.418 8 156.17 0.00
summary(fit_drm_risk_3split)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:Low -12.0674637 0.4806596 -25.106 < 2.2e-16 ***
## b:Mid -11.0552063 0.3517647 -31.428 < 2.2e-16 ***
## b:High -10.8711772 0.3998416 -27.189 < 2.2e-16 ***
## c:Low 0.0728684 0.0038963 18.702 < 2.2e-16 ***
## c:Mid 0.1293654 0.0033929 38.128 < 2.2e-16 ***
## c:High 0.1751586 0.0040680 43.058 < 2.2e-16 ***
## d:Low 0.9804785 0.0035690 274.718 < 2.2e-16 ***
## d:Mid 0.9599509 0.0028982 331.228 < 2.2e-16 ***
## [ reached getOption("max.print") -- omitted 4 rows ]
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.06852921 (3283 degrees of freedom)
compParm(fit_drm_risk_3split, "b", operator = "-")
##
## Comparison of parameter 'b'
##
## Estimate Std. Error t-value p-value
## Low-Mid -1.01226 0.59563 -1.6995 0.08932 .
## Low-High -1.19629 0.62523 -1.9134 0.05579 .
## Mid-High -0.18403 0.53255 -0.3456 0.72969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
compParm(fit_drm_risk_3split, "c", operator = "-")
##
## Comparison of parameter 'c'
##
## Estimate Std. Error t-value p-value
## Low-Mid -0.0564971 0.0051665 -10.9352 < 2.2e-16 ***
## Low-High -0.1022903 0.0056329 -18.1595 < 2.2e-16 ***
## Mid-High -0.0457932 0.0052972 -8.6448 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(fit_drm_risk_3split)
fit_drm_risk_3split_high <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_3split_df, subset = Risk=="High")
fit_drm_risk_3split_mid <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_3split_df, subset = Risk=="Mid")
fit_drm_risk_3split_low <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_3split_df, subset = Risk=="Low")
summary(fit_drm_risk_3split_high)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -10.8693720 0.3278018 -33.158 < 2.2e-16 ***
## c:(Intercept) 0.1751430 0.0033359 52.502 < 2.2e-16 ***
## d:(Intercept) 0.9758825 0.0027014 361.256 < 2.2e-16 ***
## e:(Intercept) 46.8619040 0.1050027 446.292 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.05619323 (1008 degrees of freedom)
summary(fit_drm_risk_3split_mid)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -11.0643597 0.3961231 -27.932 < 2.2e-16 ***
## c:(Intercept) 0.1294098 0.0038142 33.928 < 2.2e-16 ***
## d:(Intercept) 0.9599361 0.0032584 294.602 < 2.2e-16 ***
## e:(Intercept) 45.9148150 0.1186594 386.946 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.07705059 (1359 degrees of freedom)
summary(fit_drm_risk_3split_low)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -12.0693043 0.4732119 -25.505 < 2.2e-16 ***
## c:(Intercept) 0.0728774 0.0038346 19.005 < 2.2e-16 ***
## d:(Intercept) 0.9804786 0.0035126 279.135 < 2.2e-16 ***
## e:(Intercept) 44.2647902 0.1135436 389.848 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.06744523 (916 degrees of freedom)
confint(fit_drm_risk_3split_high)
## 2.5 % 97.5 %
## b:(Intercept) -11.5126240 -10.2261199
## c:(Intercept) 0.1685968 0.1816891
## d:(Intercept) 0.9705815 0.9811834
## e:(Intercept) 46.6558551 47.0679529
confint(fit_drm_risk_3split_mid)
## 2.5 % 97.5 %
## b:(Intercept) -11.8414388 -10.2872806
## c:(Intercept) 0.1219274 0.1368923
## d:(Intercept) 0.9535440 0.9663282
## e:(Intercept) 45.6820397 46.1475904
confint(fit_drm_risk_3split_low)
## 2.5 % 97.5 %
## b:(Intercept) -12.99800978 -11.14059877
## c:(Intercept) 0.06535177 0.08040294
## d:(Intercept) 0.97358495 0.98737217
## e:(Intercept) 44.04195432 44.48762611
# b is not actually a slope - but a scaling factor that needs to be transformed into the slope
slopes_3split <- as_tibble(coef(fit_drm_risk_3split_high), rownames = "vars") %>%
bind_cols(BWQS_risk = "high") %>%
bind_rows(as_tibble(coef(fit_drm_risk_3split_mid), rownames = "vars") %>% bind_cols(BWQS_risk = "mid")) %>%
bind_rows(as_tibble(coef(fit_drm_risk_3split_low), rownames = "vars") %>% bind_cols(BWQS_risk = "low")) %>%
mutate(vars = str_remove(vars, fixed(":(Intercept)"))) %>%
spread(., vars, value) %>%
mutate(slope = -b*(-d/(4*e)))
as_tibble(confint(fit_drm_risk_3split_high), rownames = "vars") %>%
bind_cols(BWQS_risk = "high") %>%
bind_rows(as_tibble(confint(fit_drm_risk_3split_mid), rownames = "vars") %>% bind_cols(BWQS_risk = "mid")) %>%
bind_rows(as_tibble(confint(fit_drm_risk_3split_low), rownames = "vars") %>% bind_cols(BWQS_risk = "low")) %>%
mutate(vars = str_remove(vars, fixed(":(Intercept)"))) %>%
filter(vars == "b") %>%
right_join(., slopes_3split, by = "BWQS_risk") %>%
mutate(slope_lower_ci = -`2.5 %`*(-d/(4*e)),
slope_higher_ci = -`97.5 %`*(-d/(4*e))) %>%
dplyr::select(BWQS_risk, slope, slope_lower_ci, slope_higher_ci)
## # A tibble: 3 x 4
## BWQS_risk slope slope_lower_ci slope_higher_ci
## <chr> <dbl> <dbl> <dbl>
## 1 high -0.0566 -0.0599 -0.0532
## 2 mid -0.0578 -0.0619 -0.0538
## 3 low -0.0668 -0.0720 -0.0617
fit_drm_predictions_3split <- as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_high, interval = "confidence"), warning = handler) %>%
bind_cols(., Risk = "High")) %>%
bind_rows(., as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_mid, interval = "confidence"),
warning = handler ) %>% bind_cols(., Risk = "Mid"))) %>%
bind_rows(., as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_low, interval = "confidence"),
warning = handler ) %>% bind_cols(., Risk = "Low")))
Subway_BWQS_3split_df1 <- bind_cols(Subway_BWQS_3split_df %>% arrange(Risk, date),
fit_drm_predictions_3split %>% dplyr::select(-Risk))
Subway_BWQS_3split_df2 <- Subway_BWQS_3split_df1 %>%
filter(date>"2020-02-16") %>% # subsetting for visualization
mutate(Risk = factor(if_else(Risk == "High", "High (≥ 75%ile)",
if_else(Risk=="Mid", "Mid (IQR)", "Low (≤ 25%ile)")),
levels = c("High (≥ 75%ile)", "Mid (IQR)", "Low (≤ 25%ile)")))
# Supplementary Figure 7
write_csv(Subway_BWQS_3split_df2 %>% dplyr::select(date, usage.median.ratio, Risk, Lower, Upper, Prediction), file.path(source_path, "sfig7.csv"))
## Adding missing grouping variables: `place`
sfig7 <- ggplot() +
geom_jitter(data = Subway_BWQS_3split_df2, aes(x = date, y = usage.median.ratio, color = as.factor(Risk)), alpha = .5, position = position_jitter(height = 0, width = 0.4))+
geom_ribbon(data = subset(Subway_BWQS_3split_df2, Risk == "High (≥ 75%ile)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_ribbon(data = subset(Subway_BWQS_3split_df2, Risk == "Mid (IQR)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_ribbon(data = subset(Subway_BWQS_3split_df2, Risk == "Low (≤ 25%ile)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_line(data = Subway_BWQS_3split_df2, aes(x = date, y = Prediction, color = as.factor(Risk))) +
scale_x_date("Date", date_minor_breaks = "1 week") +
scale_y_continuous("Relative Subway Ridership (%)", labels = scales::percent) +
geom_vline(xintercept = date("2020-03-22"),color = "grey30", lty = 2) +
theme_bw(base_size = 16) +
labs(colour="COVID-19 inequity index") +
theme(legend.title = element_text(face = "bold", size = 12), legend.position = c(0.8, 0.7))
if(export.figs){
write_figs(sfig7, "sfig7", fig_width = 8, fig_height = 6)
}
## Wrote /home/rushj03/dev/COVID_19_admin_disparities/figures/sfig7_2021-06-17.png
# ZCTA level BWQS for subway
Subway_ridership_by_ZCTA <- relative.subway.usage(2020, by = "zcta")
# There are clearly some higher outliers that need to be removed at the zcta level
# Remove the zctas associated with the uhfs below
high_april_bronx_subway <- tibble(date = as.Date(c("2020-04-04", "2020-04-05", "2020-04-11", "2020-04-12", "2020-04-18", "2020-04-19", "2020-04-25", "2020-04-26")),
place = c("10469", "10469","10469","10469","10469","10469","10469","10469"))
Subway_BWQS_ZCTA_df <- Subway_ridership_by_ZCTA %>%
left_join(., ZCTA_BWQS_score, by = c("place" = "zcta")) %>%
filter(!is.na(place) & date >= "2020-01-29" & date <= "2020-04-30") %>%
group_by(place) %>%
arrange(date) %>%
mutate(time_index = time(date)) %>%
ungroup() %>%
filter(!is.na(BWQS_index) & date != "2020-02-17") %>% #removing Presidents' Day national holiday
anti_join(., high_april_bronx_subway, by = c("date", "place")) %>% #removing low outliers due to service changes in low subway density areas
mutate(Risk = factor(if_else(BWQS_index < quantile(BWQS_index, probs = .25), "Low",
if_else(BWQS_index > quantile(BWQS_index, probs = .75), "High", "Mid")),
levels = c("High", "Mid", "Low")))
fit_drm_risk_3split_zcta_high <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_ZCTA_df, subset = Risk=="High")
fit_drm_risk_3split_zcta_mid <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_ZCTA_df, subset = Risk=="Mid")
fit_drm_risk_3split_zcta_low <- drm(usage.median.ratio ~ time_index, curveid = Risk, fct = W2.4(), data = Subway_BWQS_ZCTA_df, subset = Risk=="Low")
summary(fit_drm_risk_3split_zcta_high)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -10.9339014 0.3239343 -33.754 < 2.2e-16 ***
## c:(Intercept) 0.1657780 0.0032882 50.416 < 2.2e-16 ***
## d:(Intercept) 0.9645988 0.0026566 363.090 < 2.2e-16 ***
## e:(Intercept) 46.8781965 0.1028828 455.647 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.09138835 (2748 degrees of freedom)
summary(fit_drm_risk_3split_zcta_mid)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -11.3144667 0.3824104 -29.587 < 2.2e-16 ***
## c:(Intercept) 0.1357604 0.0035994 37.718 < 2.2e-16 ***
## d:(Intercept) 0.9779780 0.0030449 321.182 < 2.2e-16 ***
## e:(Intercept) 45.9310266 0.1089925 421.415 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.1489131 (5700 degrees of freedom)
summary(fit_drm_risk_3split_zcta_low)
##
## Model fitted: Weibull (type 2) (4 parms)
##
## Parameter estimates:
##
## Estimate Std. Error t-value p-value
## b:(Intercept) -11.8041674 0.3454714 -34.168 < 2.2e-16 ***
## c:(Intercept) 0.0619249 0.0028874 21.447 < 2.2e-16 ***
## d:(Intercept) 0.9885724 0.0026769 369.299 < 2.2e-16 ***
## e:(Intercept) 43.9325609 0.0871976 503.827 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error:
##
## 0.08812425 (2756 degrees of freedom)
# b is not actually a slope - but a scaling factor that needs to be transformed into the slope
slopes_zcta <- as_tibble(coef(fit_drm_risk_3split_zcta_high), rownames = "vars") %>%
bind_cols(BWQS_risk = "high") %>%
bind_rows(as_tibble(coef(fit_drm_risk_3split_zcta_mid), rownames = "vars") %>% bind_cols(BWQS_risk = "mid")) %>%
bind_rows(as_tibble(coef(fit_drm_risk_3split_zcta_low), rownames = "vars") %>% bind_cols(BWQS_risk = "low")) %>%
mutate(vars = str_remove(vars, fixed(":(Intercept)"))) %>%
spread(., vars, value) %>%
mutate(slope = -b*(-d/(4*e)))
as_tibble(confint(fit_drm_risk_3split_zcta_high), rownames = "vars") %>%
bind_cols(BWQS_risk = "high") %>%
bind_rows(as_tibble(confint(fit_drm_risk_3split_zcta_mid), rownames = "vars") %>% bind_cols(BWQS_risk = "mid")) %>%
bind_rows(as_tibble(confint(fit_drm_risk_3split_zcta_low), rownames = "vars") %>% bind_cols(BWQS_risk = "low")) %>%
mutate(vars = str_remove(vars, fixed(":(Intercept)"))) %>%
filter(vars == "b") %>%
right_join(., slopes_zcta, by = "BWQS_risk") %>%
mutate(slope_lower_ci = -`2.5 %`*(-d/(4*e)),
slope_higher_ci = -`97.5 %`*(-d/(4*e))) %>%
dplyr::select(BWQS_risk, slope, slope_lower_ci, slope_higher_ci)
## # A tibble: 3 x 4
## BWQS_risk slope slope_lower_ci slope_higher_ci
## <chr> <dbl> <dbl> <dbl>
## 1 high -0.0562 -0.0595 -0.0530
## 2 mid -0.0602 -0.0642 -0.0562
## 3 low -0.0664 -0.0702 -0.0626
fit_drm_predictions_zcta <- as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_zcta_high, interval = "confidence"), warning = handler) %>%
bind_cols(., Risk = "High")) %>%
bind_rows(., as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_zcta_mid, interval = "confidence"),
warning = handler ) %>% bind_cols(., Risk = "Mid"))) %>%
bind_rows(., as_tibble(withCallingHandlers(predict(fit_drm_risk_3split_zcta_low, interval = "confidence"),
warning = handler ) %>% bind_cols(., Risk = "Low"))) # %>% mutate(time_index = row_number())
Subway_BWQS_ZCTA_df1 <- bind_cols(Subway_BWQS_ZCTA_df %>% arrange(Risk, date),
fit_drm_predictions_zcta %>% dplyr::select(-Risk))
Subway_BWQS_ZCTA_df2 <- Subway_BWQS_ZCTA_df1 %>%
filter(date>"2020-02-16") %>% # subsetting for visualization
mutate(Risk = factor(if_else(Risk == "High", "High (≥ 75%ile)",
if_else(Risk=="Mid", "Mid (IQR)", "Low (≤ 25%ile)")),
levels = c("High (≥ 75%ile)", "Mid (IQR)", "Low (≤ 25%ile)")))
# Supplementary Figure 8
write_csv(Subway_BWQS_ZCTA_df2 %>% dplyr::select(date, usage.median.ratio, Lower, Upper, Prediction, Risk), file.path(source_path, "sfig8.csv"))
sfig8 <- ggplot() +
geom_jitter(data = Subway_BWQS_ZCTA_df2, aes(x = date, y = usage.median.ratio, color = Risk), size = 0.3, alpha = 0.3, position = position_jitter(height = 0, width = 0.42))+
geom_ribbon(data = subset(Subway_BWQS_ZCTA_df2, Risk == "High (≥ 75%ile)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_ribbon(data = subset(Subway_BWQS_ZCTA_df2, Risk == "Mid (IQR)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_ribbon(data = subset(Subway_BWQS_ZCTA_df2, Risk == "Low (≤ 25%ile)"), aes(x = date, ymin = Lower, ymax = Upper), fill = "grey50") +
geom_line(data = Subway_BWQS_ZCTA_df2, aes(x = date, y = Prediction, color = Risk)) +
scale_x_date("Date", date_minor_breaks = "1 week") +
scale_y_continuous("Relative Subway Ridership (%)", labels = scales::percent) +
geom_vline(xintercept = date("2020-03-22"),color = "grey30", lty = 2) +
theme_bw(base_size = 16) +
labs(colour="COVID-19 inequity index") +
theme(legend.title = element_text(face = "bold", size = 12), legend.position = c(0.8, 0.7))
if(export.figs){
write_figs(sfig8, "sfig8", fig_width = 8, fig_height = 6)
}
## Wrote /home/rushj03/dev/COVID_19_admin_disparities/figures/sfig8_2021-06-17.png
# Supplementary Figure 9: compare MTA turnstile data to Google mobility reports
source(here("code/mta_vs_google.R"), echo = FALSE)
write_csv(comp_long, file.path(source_path, "sfig9.csv"))
mobplot
#### Appendix ####
t_final = Sys.time()
## [1] "t_start: 2021-06-17 10:16:03"
## [1] "t_turnstile_1: 2021-06-17 10:16:10"
## [1] "t_turnstile_2: 2021-06-17 10:16:22"
## [1] "t_census: 2021-06-17 10:16:48"
## [1] "t_dataprep: 2021-06-17 10:19:58"
## [1] "t_m1: 2021-06-17 10:19:58"
## [1] "t_postm1: 2021-06-17 10:20:08"
## [1] "t_m2: 2021-06-17 10:20:12"
## [1] "t_m2_3q: 2021-06-17 10:20:16"
## [1] "t_final: 2021-06-17 10:21:39"
Time part 1: Script start through loading of MTA Turnstile package:
print(t_turnstile_1 - t_start)
## Time difference of 7.07228 secs
Time part 2: Downloading (if this is the first run) and processing of subway turnstile data by neighborhood:
This run used cached downloads of MTA turnstile data. The time to run this section including downloads is around 14 minutes.
print(t_turnstile_2 - t_turnstile_1)
## Time difference of 12.21892 secs
Time part 3: Downloading of all other data and processing of Census data:
This run used cached downloads of data. The time to run this section including downloads is around 5 minutes.
print(t_census - t_turnstile_2)
## Time difference of 25.54943 secs
Time part 4: All other data processing, modeling, and figure generation:
This run used cached model output for at least one of the three BWQS models. The time to run this section without any cached model output is around 35 minutes.
print(t_final - t_census)
## Time difference of 4.842822 mins
Total runtime:
print(t_final - t_start)
## Time difference of 5.590166 mins
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────────────────────────
## setting value
## version R version 4.0.2 (2020-06-22)
## os Ubuntu 18.04.5 LTS
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2021-06-17
##
## ─ Packages ───────────────────────────────────────────────────────────────────────────────────────
## ! package * version date lib source
## P abind * 1.4-5 2016-07-21 [?] CRAN (R 4.0.2)
## P acepack 1.4.1 2016-10-29 [?] CRAN (R 4.0.2)
## P askpass 1.1 2019-01-13 [?] CRAN (R 4.0.2)
## P assertthat 0.2.1 2019-03-21 [?] CRAN (R 4.0.2)
## P backports 1.1.8 2020-06-17 [?] CRAN (R 4.0.2)
## P base64enc 0.1-3 2015-07-28 [?] CRAN (R 4.0.2)
## P BBmisc 1.11 2017-03-10 [?] CRAN (R 4.0.2)
## P bit 4.0.4 2020-08-04 [?] CRAN (R 4.0.2)
## P bit64 4.0.5 2020-08-30 [?] CRAN (R 4.0.2)
## P bitops 1.0-6 2013-08-17 [?] CRAN (R 4.0.2)
## P blob 1.2.1 2020-01-20 [?] CRAN (R 4.0.2)
## P boot 1.3-25 2020-04-26 [?] CRAN (R 4.0.0)
## P broom * 0.5.6 2020-04-20 [?] CRAN (R 4.0.2)
## P callr 3.4.3 2020-03-28 [?] CRAN (R 4.0.2)
## P car 3.0-8 2020-05-21 [?] CRAN (R 4.0.2)
## P carData 3.0-4 2020-05-22 [?] CRAN (R 4.0.2)
## P cellranger 1.1.0 2016-07-27 [?] CRAN (R 4.0.2)
## P checkmate 2.0.0 2020-02-06 [?] CRAN (R 4.0.2)
## P class 7.3-17 2020-04-26 [?] CRAN (R 4.0.0)
## P classInt 0.4-3 2020-04-07 [?] CRAN (R 4.0.2)
## P cli 2.0.2 2020-02-28 [?] CRAN (R 4.0.2)
## P cluster 2.1.0 2019-06-19 [?] CRAN (R 4.0.0)
## P coda 0.19-3 2019-07-05 [?] CRAN (R 4.0.2)
## P codetools 0.2-16 2018-12-24 [?] CRAN (R 4.0.0)
## P colorspace 1.4-1 2019-03-18 [?] CRAN (R 4.0.2)
## P corrplot * 0.84 2017-10-16 [?] CRAN (R 4.0.2)
## P crayon 1.3.4 2017-09-16 [?] CRAN (R 4.0.2)
## P curl 4.3 2019-12-02 [?] CRAN (R 4.0.2)
## P data.table * 1.13.0 2020-07-24 [?] CRAN (R 4.0.2)
## P DBI 1.1.0 2019-12-15 [?] CRAN (R 4.0.2)
## P dbplyr 1.4.4 2020-05-27 [?] CRAN (R 4.0.2)
## P deldir 0.1-25 2020-02-03 [?] CRAN (R 4.0.2)
## P DHARMa * 0.3.3.0 2020-09-08 [?] CRAN (R 4.0.2)
## P digest 0.6.25 2020-02-23 [?] CRAN (R 4.0.2)
## P dplyr * 1.0.0 2020-05-29 [?] CRAN (R 4.0.2)
## P drc * 3.0-1 2016-08-30 [?] CRAN (R 4.0.2)
## P e1071 1.7-3 2019-11-26 [?] CRAN (R 4.0.2)
## P egg * 0.4.5 2019-07-13 [?] CRAN (R 4.0.2)
## P ellipsis 0.3.1 2020-05-15 [?] CRAN (R 4.0.2)
## P evaluate 0.14 2019-05-28 [?] CRAN (R 4.0.2)
## P expm 0.999-4 2019-03-21 [?] CRAN (R 4.0.2)
## P fansi 0.4.1 2020-01-08 [?] CRAN (R 4.0.2)
## P farver 2.0.3 2020-01-16 [?] CRAN (R 4.0.2)
## P fastmap 1.0.1 2019-10-08 [?] CRAN (R 4.0.2)
## P fastmatch 1.1-0 2017-01-28 [?] CRAN (R 4.0.2)
## P forcats * 0.5.0 2020-03-01 [?] CRAN (R 4.0.2)
## P foreach 1.5.0 2020-03-30 [?] CRAN (R 4.0.2)
## P foreign 0.8-79 2020-04-26 [?] CRAN (R 4.0.0)
## P Formula 1.2-3 2018-05-03 [?] CRAN (R 4.0.2)
## P fs 1.4.2 2020-06-30 [?] CRAN (R 4.0.2)
## P fst * 0.9.2 2020-04-01 [?] CRAN (R 4.0.2)
## P gap 1.2.2 2020-02-02 [?] CRAN (R 4.0.2)
## P gdata 2.18.0 2017-06-06 [?] CRAN (R 4.0.2)
## P generics 0.0.2 2018-11-29 [?] CRAN (R 4.0.2)
## P ggExtra * 0.9 2019-08-27 [?] CRAN (R 4.0.2)
## P ggmap 3.0.0 2019-02-05 [?] CRAN (R 4.0.2)
## P ggplot2 * 3.3.2 2020-06-19 [?] CRAN (R 4.0.2)
## P ggpubr * 0.4.0 2020-06-27 [?] CRAN (R 4.0.2)
## P ggridges * 0.5.2 2020-01-12 [?] CRAN (R 4.0.2)
## P ggsignif 0.6.0 2019-08-08 [?] CRAN (R 4.0.2)
## P ggsn * 0.5.0 2019-02-18 [?] CRAN (R 4.0.2)
## P glue 1.4.2 2020-08-27 [?] CRAN (R 4.0.2)
## P gmodels 2.18.1 2018-06-25 [?] CRAN (R 4.0.2)
## P gridExtra * 2.3 2017-09-09 [?] CRAN (R 4.0.2)
## P gtable 0.3.0 2019-03-25 [?] CRAN (R 4.0.2)
## P gtools 3.8.2 2020-03-31 [?] CRAN (R 4.0.2)
## P haven 2.3.1 2020-06-01 [?] CRAN (R 4.0.2)
## P here * 0.1 2017-05-28 [?] CRAN (R 4.0.2)
## P highr 0.8 2019-03-20 [?] CRAN (R 4.0.2)
## P Hmisc 4.4-0 2020-03-23 [?] CRAN (R 4.0.2)
## P hms 0.5.3 2020-01-08 [?] CRAN (R 4.0.2)
## P htmlTable 2.0.0 2020-06-21 [?] CRAN (R 4.0.2)
## P htmltools 0.5.0 2020-06-16 [?] CRAN (R 4.0.2)
## P htmlwidgets 1.5.1 2019-10-08 [?] CRAN (R 4.0.2)
## P httpuv 1.5.4 2020-06-06 [?] CRAN (R 4.0.2)
## P httr * 1.4.1 2019-08-05 [?] CRAN (R 4.0.2)
## P inline 0.3.15 2018-05-18 [?] CRAN (R 4.0.2)
## P iterators 1.0.12 2019-07-26 [?] CRAN (R 4.0.2)
## P jpeg 0.1-8.1 2019-10-24 [?] CRAN (R 4.0.2)
## P jsonlite * 1.7.0 2020-06-25 [?] CRAN (R 4.0.2)
## P Just.universal * 0.0.0.9000 2020-11-24 [?] github (justlab/Just_universal@45762c3)
## P kableExtra * 1.3.1 2020-10-22 [?] CRAN (R 4.0.2)
## P KernSmooth 2.23-17 2020-04-26 [?] CRAN (R 4.0.0)
## P knitr 1.29 2020-06-23 [?] CRAN (R 4.0.2)
## P labeling 0.3 2014-08-23 [?] CRAN (R 4.0.2)
## P later 1.1.0.1 2020-06-05 [?] CRAN (R 4.0.2)
## P lattice 0.20-41 2020-04-02 [?] CRAN (R 4.0.0)
## P latticeExtra 0.6-29 2019-12-19 [?] CRAN (R 4.0.2)
## P LearnBayes 2.15.1 2018-03-18 [?] CRAN (R 4.0.2)
## P lifecycle 0.2.0 2020-03-06 [?] CRAN (R 4.0.2)
## P lme4 1.1-23 2020-04-07 [?] CRAN (R 4.0.2)
## P loo 2.2.0 2019-12-19 [?] CRAN (R 4.0.2)
## P lubridate * 1.7.9 2020-06-08 [?] CRAN (R 4.0.2)
## P lwgeom * 0.2-5 2020-06-12 [?] CRAN (R 4.0.2)
## P magic * 1.5-9 2018-09-17 [?] CRAN (R 4.0.2)
## P magrittr 1.5 2014-11-22 [?] CRAN (R 4.0.2)
## P maptools 1.0-1 2020-05-14 [?] CRAN (R 4.0.2)
## P MASS * 7.3-51.6 2020-04-26 [?] CRAN (R 4.0.0)
## P Matrix * 1.2-18 2019-11-27 [?] CRAN (R 4.0.0)
## P matrixStats * 0.56.0 2020-03-13 [?] CRAN (R 4.0.2)
## P memoise 1.1.0 2017-04-21 [?] CRAN (R 4.0.2)
## P mgcv 1.8-31 2019-11-09 [?] CRAN (R 4.0.0)
## P mime 0.9 2020-02-04 [?] CRAN (R 4.0.2)
## P miniUI 0.1.1.1 2018-05-18 [?] CRAN (R 4.0.2)
## P minqa 1.2.4 2014-10-09 [?] CRAN (R 4.0.2)
## P modelr 0.1.8 2020-05-19 [?] CRAN (R 4.0.2)
## P MTA.turnstile * 0.0.0.9001 2020-11-24 [?] Github (justlab/MTA_turnstile@544eba3)
## P multcomp 1.4-13 2020-04-08 [?] CRAN (R 4.0.2)
## P munsell 0.5.0 2018-06-12 [?] CRAN (R 4.0.2)
## P mvtnorm 1.1-1 2020-06-09 [?] CRAN (R 4.0.2)
## P nlme 3.1-147 2020-04-13 [?] CRAN (R 4.0.0)
## P nloptr 1.2.2.1 2020-03-11 [?] CRAN (R 4.0.2)
## P nnet 7.3-14 2020-04-26 [?] CRAN (R 4.0.0)
## P openxlsx 4.1.5 2020-05-06 [?] CRAN (R 4.0.2)
## P ParamHelpers 1.14 2020-03-24 [?] CRAN (R 4.0.2)
## P pdftools * 2.3.1 2020-05-22 [?] CRAN (R 4.0.2)
## P pillar 1.4.4 2020-05-05 [?] CRAN (R 4.0.2)
## P pkgbuild 1.0.8 2020-05-07 [?] CRAN (R 4.0.2)
## P pkgconfig 2.0.3 2019-09-22 [?] CRAN (R 4.0.2)
## P plotrix 3.7-8 2020-04-16 [?] CRAN (R 4.0.2)
## P plyr 1.8.6 2020-03-03 [?] CRAN (R 4.0.2)
## P png 0.1-7 2013-12-03 [?] CRAN (R 4.0.2)
## P prettyunits 1.1.1 2020-01-24 [?] CRAN (R 4.0.2)
## P processx 3.4.2 2020-02-09 [?] CRAN (R 4.0.2)
## P promises 1.1.1 2020-06-09 [?] CRAN (R 4.0.2)
## P ps 1.3.3 2020-05-08 [?] CRAN (R 4.0.2)
## P purrr * 0.3.4 2020-04-17 [?] CRAN (R 4.0.2)
## P qpdf 1.1 2019-03-07 [?] CRAN (R 4.0.2)
## P qs * 0.22.1 2020-06-10 [?] CRAN (R 4.0.2)
## P R6 2.4.1 2019-11-12 [?] CRAN (R 4.0.2)
## P ragg * 0.4.0 2020-10-05 [?] CRAN (R 4.0.2)
## P RApiSerialize 0.1.0 2014-04-19 [?] CRAN (R 4.0.2)
## P rappdirs 0.3.1 2016-03-28 [?] CRAN (R 4.0.2)
## P raster 3.3-7 2020-06-27 [?] CRAN (R 4.0.2)
## P RColorBrewer 1.1-2 2014-12-07 [?] CRAN (R 4.0.2)
## P Rcpp 1.0.4.6 2020-04-09 [?] CRAN (R 4.0.2)
## P RcppParallel 5.0.2 2020-06-24 [?] CRAN (R 4.0.2)
## P readr * 1.3.1 2018-12-21 [?] CRAN (R 4.0.2)
## P readxl * 1.3.1 2019-03-13 [?] CRAN (R 4.0.2)
## renv 0.12.2 2020-11-04 [1] CRAN (R 4.0.2)
## P reprex 0.3.0 2019-05-16 [?] CRAN (R 4.0.2)
## P rgdal 1.5-12 2020-06-26 [?] CRAN (R 4.0.2)
## P RgoogleMaps 1.4.5.3 2020-02-12 [?] CRAN (R 4.0.2)
## P rio 0.5.16 2018-11-26 [?] CRAN (R 4.0.2)
## P rjson 0.2.20 2018-06-08 [?] CRAN (R 4.0.2)
## P rlang 0.4.6 2020-05-02 [?] CRAN (R 4.0.2)
## P rmarkdown 2.5 2020-10-21 [?] CRAN (R 4.0.2)
## P rpart 4.1-15 2019-04-12 [?] CRAN (R 4.0.0)
## P rprojroot 1.3-2 2018-01-03 [?] CRAN (R 4.0.2)
## P RSQLite * 2.2.1 2020-09-30 [?] CRAN (R 4.0.2)
## P rstan * 2.19.3 2020-02-11 [?] CRAN (R 4.0.2)
## P rstatix 0.6.0 2020-06-18 [?] CRAN (R 4.0.2)
## P rstudioapi 0.11 2020-02-07 [?] CRAN (R 4.0.2)
## P rvest 0.3.5 2019-11-08 [?] CRAN (R 4.0.2)
## P sandwich 2.5-1 2019-04-06 [?] CRAN (R 4.0.2)
## P scales * 1.1.1 2020-05-11 [?] CRAN (R 4.0.2)
## P sessioninfo 1.1.1 2018-11-05 [?] CRAN (R 4.0.2)
## P sf * 0.9-6 2020-09-13 [?] CRAN (R 4.0.2)
## P shiny 1.5.0 2020-06-23 [?] CRAN (R 4.0.2)
## P sp * 1.4-2 2020-05-20 [?] CRAN (R 4.0.2)
## P spatialreg * 1.1-5 2019-12-01 [?] CRAN (R 4.0.2)
## P spData * 0.3.5 2020-04-06 [?] CRAN (R 4.0.2)
## P spdep * 1.1-5 2020-06-29 [?] CRAN (R 4.0.2)
## P StanHeaders * 2.21.0-6 2020-08-16 [?] CRAN (R 4.0.2)
## P statmod 1.4.34 2020-02-17 [?] CRAN (R 4.0.2)
## P stringfish 0.12.1 2020-06-05 [?] CRAN (R 4.0.2)
## P stringi 1.5.3 2020-09-09 [?] CRAN (R 4.0.2)
## P stringr * 1.4.0 2019-02-10 [?] CRAN (R 4.0.2)
## P survival 3.1-12 2020-04-10 [?] CRAN (R 4.0.0)
## P systemfonts 0.3.2 2020-09-29 [?] CRAN (R 4.0.2)
## P textshaping 0.1.2 2020-10-08 [?] CRAN (R 4.0.2)
## P TH.data 1.0-10 2019-01-21 [?] CRAN (R 4.0.2)
## P tibble * 3.0.1 2020-04-20 [?] CRAN (R 4.0.2)
## P tidycensus * 0.10.2 2020-09-28 [?] CRAN (R 4.0.2)
## P tidyr * 1.1.0 2020-05-20 [?] CRAN (R 4.0.2)
## P tidyselect 1.1.0 2020-05-11 [?] CRAN (R 4.0.2)
## P tidyverse * 1.3.0 2019-11-21 [?] CRAN (R 4.0.2)
## P tigris 0.9.4 2020-04-01 [?] CRAN (R 4.0.2)
## P units 0.6-7 2020-06-13 [?] CRAN (R 4.0.2)
## P utf8 1.1.4 2018-05-24 [?] CRAN (R 4.0.2)
## P uuid 0.1-4 2020-02-26 [?] CRAN (R 4.0.2)
## P vctrs 0.3.1 2020-06-05 [?] CRAN (R 4.0.2)
## P viridisLite 0.3.0 2018-02-01 [?] CRAN (R 4.0.2)
## P webshot 0.5.2 2019-11-22 [?] CRAN (R 4.0.2)
## P withr 2.2.0 2020-04-20 [?] CRAN (R 4.0.2)
## P xfun 0.15 2020-06-21 [?] CRAN (R 4.0.2)
## P xgboost 1.1.1.1 2020-06-14 [?] CRAN (R 4.0.2)
## P xml2 1.3.2 2020-04-23 [?] CRAN (R 4.0.2)
## P xtable 1.8-4 2019-04-21 [?] CRAN (R 4.0.2)
## P yaml 2.2.1 2020-02-01 [?] CRAN (R 4.0.2)
## P zip * 2.0.4 2019-09-01 [?] CRAN (R 4.0.2)
## P zoo 1.8-8 2020-05-02 [?] CRAN (R 4.0.2)
##
## [1] /home/rushj03/dev/COVID_19_admin_disparities/renv/library/R-4.0/x86_64-pc-linux-gnu
## [2] /data/scratch/rtemp/RtmpeGn2vn/renv-system-library
##
## P ── Loaded and on-disk path mismatch.