## ----------------------------------------------------------------------------- #| label: setup library(dplyr) library(httk) library(httr2) library(purrr) library(readr) library(readxl) library(sf) library(stringr) library(tibble) library(tidyr) geo_tox_data <- list() ## ----------------------------------------------------------------------------- #| label: exposure filename <- "Region4b_2020ATS_Exposure_Concentrations.xlsx" tmp <- tempfile(filename) download.file( paste0( "https://gaftp.epa.gov/rtrmodeling_public/AirToxScreen/2020/", "Exposure%20Concentrations/", filename ), tmp ) exposure <- read_xlsx(tmp) # Normalization function min_max_norm = function(x) { min_x <- min(x, na.rm = TRUE) max_x <- max(x, na.rm = TRUE) if (min_x == max_x) { rep(0, length(x)) } else { (x - min_x) / (max_x - min_x) } } geo_tox_data$exposure <- exposure |> # North Carolina counties filter(State == "NC", !grepl("0$", FIPS)) |> # Aggregate chemicals by county summarize( across( -c(State:Population), # Note: use "-c(State:Tract)" for 2019 data list(mean = mean, sd = sd), .names = "{col}|||{fn}" ), .by = FIPS ) |> pivot_longer(-FIPS) |> separate_wider_delim(name, "|||", names = c("chemical", "stat")) |> pivot_wider(names_from = stat) |> # Normalize concentrations mutate(norm = min_max_norm(mean), .by = chemical) ## ----------------------------------------------------------------------------- #| label: comptox # Copy/paste the chemical names into the batch search field # https://comptox.epa.gov/dashboard/batch-search cat(geo_tox_data$exposure |> distinct(chemical) |> pull(), sep = "\n") # Export results with "CAS-RN" identifiers as a csv file, then process in R # Remove rows without clear chemical matches, investigate manually if desired exposure_casrn <- read_csv("CCD-Batch-Search.csv") |> filter(DTXSID != "N/A", !grepl("WARNING", FOUND_BY)) # Update exposure data with CompTox Dashboard data geo_tox_data$exposure <- geo_tox_data$exposure |> inner_join(exposure_casrn, by = join_by(chemical == INPUT)) |> select(FIPS, casn = CASRN, chnm = PREFERRED_NAME, mean, sd, norm) ## ----------------------------------------------------------------------------- #| label: available-assays # Get all supported assays help_text <- request("https://ice.ntp.niehs.nih.gov/api/v1/search/help") |> req_perform() |> resp_body_string() supported_assays <- str_split_i(help_text, "Supported assay\\(s\\):", 2) |> str_split_1("\",\"") |> str_replace_all(c("\"" = "")) |> str_trim() # Search for assays available for given chemids chemids <- unique(geo_tox_data$exposure$casn) resp <- request("https://ice.ntp.niehs.nih.gov/api/v1/search") |> req_body_json(list(chemids = chemids, assays = supported_assays)) |> req_perform() result <- resp |> resp_body_json() |> pluck("endPoints") fields <- c("assay", "casrn", "dtxsid", "substanceName", "endpoint", "value") df <- map(fields, \(x) map_chr(result, x)) |> set_names(fields) |> bind_cols() available_assays <- df |> distinct(assay) |> pull() |> sort() ## ----------------------------------------------------------------------------- #| label: cHTS-hits get_cHTS_hits <- function(assays = NULL, chemids = NULL) { if (is.null(assays) & is.null(chemids)) { stop("Must provide at least one of 'assays' or 'chemids'") } # Format query parameters req_params <- list() if (!is.null(assays)) { if (!is.list(assays)) assays <- as.list(assays) req_params$assays <- assays } if (!is.null(chemids)) { if (!is.list(chemids)) chemids <- as.list(chemids) req_params$chemids <- chemids } # Query ICE API resp <- request("https://ice.ntp.niehs.nih.gov/api/v1/search") |> req_body_json(req_params) |> req_perform() if (resp$status_code != 200) { stop("Failed to retrieve data from ICE API") } # Return active chemicals result <- resp |> resp_body_json() |> pluck("endPoints") fields <- c("assay", "casrn", "dtxsid", "substanceName", "endpoint", "value") map(fields, \(x) map_chr(result, x)) |> set_names(fields) |> bind_cols() |> filter(endpoint == "Call", value == "Active") |> select(-c(endpoint, value)) |> distinct() } assays <- c( "APR_HepG2_p53Act_1hr", "APR_HepG2_p53Act_24hr", "APR_HepG2_p53Act_72hr", "ATG_p53_CIS", "TOX21_DT40_LUC", "TOX21_DT40_100_LUC", "TOX21_DT40_657_LUC", "TOX21_ELG1_LUC_Agonist", "TOX21_H2AX_HTRF_CHO_Agonist_ratio", "TOX21_p53_BLA_p1_ratio", "TOX21_p53_BLA_p2_ratio", "TOX21_p53_BLA_p3_ratio", "TOX21_p53_BLA_p4_ratio", "TOX21_p53_BLA_p5_ratio" ) chemids <- unique(geo_tox_data$exposure$casn) cHTS_hits <- get_cHTS_hits(assays = assays, chemids = chemids) ## ----------------------------------------------------------------------------- #| label: dose-response get_ICE_dose_resp <- function(assays = NULL, chemids = NULL) { if (is.null(assays) & is.null(chemids)) { stop("Must provide at least one of 'assays' or 'chemids'") } # Format query parameters req_params <- list() if (!is.null(assays)) { if (!is.list(assays)) assays <- as.list(assays) req_params$assays <- assays } if (!is.null(chemids)) { if (!is.list(chemids)) chemids <- as.list(chemids) req_params$chemids <- chemids } # Query ICE API resp <- request("https://ice.ntp.niehs.nih.gov/api/v1/curves") |> req_body_json(req_params) |> req_perform() if (resp$status_code != 200) { stop("Failed to retrieve data from ICE API") } # Return dose-response data result <- resp |> resp_body_json() |> pluck("curves") map(result, function(x) { tibble( endp = x[["endpoint"]], casn = x[["casrn"]], chnm = x[["substance"]], call = x[["call"]], logc = map_dbl(x[["concentrationResponses"]], "concentration") |> log10(), resp = map_dbl(x[["concentrationResponses"]], "response") ) }) |> bind_rows() } assays <- unique(cHTS_hits$assay) chemids <- intersect(cHTS_hits$casrn, geo_tox_data$exposure$casn) dose_response <- get_ICE_dose_resp(assays = assays, chemids = chemids) # Only keep active calls for assay/chemical combinations geo_tox_data$dose_response <- dose_response |> filter(call == "Active") |> select(-call) # Update dose-response chemical names using CompTox Dashboard data geo_tox_data$dose_response <- geo_tox_data$dose_response |> inner_join(exposure_casrn, by = join_by(casn == CASRN)) |> select(endp, casn, chnm = PREFERRED_NAME, logc, resp) ## ----------------------------------------------------------------------------- #| label: age # Data for North Carolina url <- paste0( "https://www2.census.gov/programs-surveys/popest/datasets/", "2020-2024/counties/asrh/cc-est2024-alldata-37.csv" ) age <- read_csv(url) geo_tox_data$age <- age |> # 7/1/2020 population estimate filter(YEAR == 2) |> # Create FIPS mutate(FIPS = str_c(STATE, COUNTY)) |> # Keep selected columns select(FIPS, AGEGRP, TOT_POP) ## ----------------------------------------------------------------------------- #| label: obesity-status places <- read_csv( "PLACES__County_Data_(GIS_Friendly_Format),_2020_release.csv" ) # Convert confidence interval to standard deviation extract_SD <- function(x) { range <- as.numeric(str_split_1(str_sub(x, 2, -2), ",")) diff(range) / 3.92 } geo_tox_data$obesity <- places |> # North Carolina Counties filter(StateAbbr == "NC") |> # Select obesity data select(FIPS = CountyFIPS, OBESITY_CrudePrev, OBESITY_Crude95CI) |> # Change confidence interval to standard deviation rowwise() |> mutate(OBESITY_SD = extract_SD(OBESITY_Crude95CI)) |> ungroup() |> select(-OBESITY_Crude95CI) ## ----------------------------------------------------------------------------- #| label: C_ss set.seed(2345) n_samples <- 500 # Get CASN for which httk simulation is possible. Try using load_dawson2021, # load_sipes2017, or load_pradeep2020 to increase availability. load_sipes2017() casn <- intersect( unique(geo_tox_data$dose_response$casn), get_cheminfo(suppress.messages = TRUE) ) # Define population demographics for httk simulation pop_demo <- cross_join( tibble(age_group = list( c(0, 2), c(3, 5), c(6, 10), c(11, 15), c(16, 20), c(21, 30), c(31, 40), c(41, 50), c(51, 60), c(61, 70), c(71, 100) )), tibble(weight = c("Normal", "Obese")) ) # Create wrapper function around httk steps simulate_css <- function( chem.cas, agelim_years, weight_category, samples, verbose = TRUE ) { if (verbose) { cat( chem.cas, paste0("(", paste(agelim_years, collapse = ", "), ")"), weight_category, "\n" ) } httkpop <- list( method = "vi", gendernum = NULL, agelim_years = agelim_years, agelim_months = NULL, weight_category = weight_category, reths = c( "Mexican American", "Other Hispanic", "Non-Hispanic White", "Non-Hispanic Black", "Other" ) ) css <- try( suppressWarnings({ mcs <- create_mc_samples( chem.cas = chem.cas, samples = samples, httkpop.generate.arg.list = httkpop, suppress.messages = TRUE ) calc_analytic_css( chem.cas = chem.cas, parameters = mcs, model = "3compartmentss", suppress.messages = TRUE ) }), silent = TRUE ) # Return if (is(css, "try-error")) { warning("simulate_css() failed to generate data for CASN ", chem.cas) list(NA) } else { list(css) } } # Simulate C_ss values simulated_css <- map(casn, function(chem.cas) { pop_demo |> rowwise() |> mutate( css = simulate_css(.env$chem.cas, age_group, weight, .env$n_samples) ) |> ungroup() }) simulated_css <- setNames(simulated_css, casn) # Remove CASN that failed simulate_css casn_keep <- map_lgl(simulated_css, function(df) { !(length(df$css[[1]]) == 1 && is.na(df$css[[1]])) }) simulated_css <- simulated_css[casn_keep] geo_tox_data$simulated_css <- simulated_css |> map(\(x) { x |> mutate( age_lb = map_int(age_group, first), age_ub = map_int(age_group, last) ) |> select(age_lb, age_ub, weight, css) |> unnest(css) }) |> enframe(name = "casn") |> unnest(value) ## ----------------------------------------------------------------------------- #| label: retain casn_keep <- unique(geo_tox_data$simulated_css$casn) geo_tox_data$exposure <- geo_tox_data$exposure |> filter(casn %in% casn_keep) geo_tox_data$dose_response <- geo_tox_data$dose_response |> filter(casn %in% casn_keep) ## ----------------------------------------------------------------------------- #| label: boundaries county <- st_read("cb_2020_us_county_5m/cb_2020_us_county_5m.shp") state <- st_read("cb_2020_us_state_5m/cb_2020_us_state_5m.shp") geo_tox_data$boundaries <- list( county = county |> filter(STATEFP == 37) |> select(FIPS = GEOID, geometry), state = state |> filter(STATEFP == 37) |> select(geometry) )