## ----init, include = FALSE---------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----setup-------------------------------------------------------------------- # library(tidyverse, quietly = TRUE) # library(cowplot) # library(RColorBrewer) # library(ctxR) # library(flextable) # library(officer) # library(patchwork) # library(mirai) # # devtools::load_all() # load invivopkfit # # # get today's date # today <- format(Sys.Date(), "%d%B%Y") ## ----eval = FALSE------------------------------------------------------------- # Sys.setenv( # OD_DIR = "path/to/inputs/", # FIG_DIR = "path/to/outputs/" # ) ## ----eval = FALSE------------------------------------------------------------- # ctxR::register_ctx_api_key(key = "") ## ----------------------------------------------------------------------------- # read_date_pk <- "25November2024" ## ----------------------------------------------------------------------------- # read_date_results <- "27November2024" ## ----pk_obj_fromSQL, eval=FALSE----------------------------------------------- # ### Minimal PK Object ###---- # # Minimum pk object, add options later # # Note that these mappings are now a default mappings, just being verbose here. # # If there are any standard deviations that are zero or NA, but N_Subjects > 1, # # Set the n_subjects to 6 or the current value if lower than 6. # # (In Showa the mode is 6, but this condition is present in CVT_Legacy as well) # minimal_pk <- pk( # data = cvt %>% # mutate(n_subjects_normalized = if_else( # n_subjects_normalized > 1 & is.na(conc_sd), # min(n_subjects_normalized, 6), n_subjects_normalized # )), # mapping = ggplot2::aes( # Chemical = analyzed_chem_dtxsid, # Chemical_Name = analyzed_chem_name_original, # DTXSID = analyzed_chem_dtxsid, # CASRN = analyzed_chem_casrn, # Species = species, # Reference = fk_extraction_document_id, # Media = conc_medium_normalized, # Route = administration_route_normalized, # Dose = dose_level_normalized, # Dose.Units = "mg/kg", # Subject_ID = fk_subject_id, # Series_ID = fk_series_id, # Study_ID = fk_study_id, # ConcTime_ID = conc_time_id, # N_Subjects = n_subjects_normalized, # Weight = weight_kg, # Weight.Units = "kg", # Time = time_hr, # Time.Units = "hours", # Value = conc, # Value.Units = "mg/L", # Value_SD = conc_sd, # LOQ = loq # ) # ) ## ----outlining_all_options, eval=FALSE---------------------------------------- # # Types of Choices: # ## Dose Normalized # ## Log10 transform # ## Scale time # # List of options # fitopts <- expand.grid( # error_model = c("pooled", "hierarchical"), # dose_norm = c(TRUE, FALSE), # log10_trans = c(TRUE, FALSE), # time_scale = c(TRUE, FALSE), # stringsAsFactors = FALSE # ) ## ----fitting_choices, eval=FALSE---------------------------------------------- # fit_data <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale, # file_dir = Sys.getenv("OD_DIR")) { # retval <- -9 # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0( # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds" # ) # # cat("Fitting: ", file_str, "\n") # # tryCatch( # expr = { # this_pk <- minimal_pk + # facet_data(Chemical, Species) + # settings_preprocess(keep_data_original = FALSE, suppress.messages = TRUE) + # scale_conc(dose_norm = this_dose_norm, log10_trans = this_log10_trans) + # settings_optimx(method = c( # "L-BFGS-B", # "bobyqa" # )) # # if (this_error_model %in% "pooled") { # this_pk <- this_pk + stat_error_model(Chemical, Species) # } else if (this_error_model %in% c("hierarchical", "joint")) { # this_pk <- this_pk + stat_error_model(Chemical, Species, Reference) # } else { # stop("this_error_model must be either 'pooled' or 'hierarchical'/'joint'") # } # # if (this_time_scale %in% TRUE) { # this_pk <- this_pk + scale_time(new_units = "auto") # } # # # do the fit # this_time <- system.time(this_fit <- do_fit(this_pk, async = TRUE)) # # # # # save the result # saveRDS(this_fit, file = paste0(file_dir, file_str)) # # cli::cli_alert_success(text = "Fitting success!") # print(this_time) # # retval <- this_time[[3]] # }, # error = function(e) { # cli::cli_alert_danger("Analysis {file_str}, failed with error: {e}") # retval <- -1 # } # ) # # return(retval) # } ## ----running_all_options, eval=FALSE------------------------------------------ # daemons(8L) # everywhere(devtools::load_all()) # system.time(tmp_out <- mapply( # fit_data, # this_error_model = fitopts$error_model, # this_dose_norm = fitopts$dose_norm, # this_log10_trans = fitopts$log10_trans, # this_time_scale = fitopts$time_scale # )) # daemons(0L) # # mean(tmp_out) / 60 # Average runtime (in minutes) per fitting option # median(tmp_out) / 60 # sum(tmp_out) # Looks like there's a few extra seconds of overhead # split(tmp_out, f = factor(names(tmp_out))) |> lapply(mean) ## ----------------------------------------------------------------------------- # my_pk <- readRDS(paste0( # Sys.getenv("OD_DIR"), # # read_date_pk, # "mypk_fit_110p.Rds" # )) ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Chemical) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Reference) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Species) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Chemical, Species) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Chemical, Species, Reference) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Chemical, Species, Reference, Route, Media) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # distinct(Chemical, Species, Reference, Route, Media, Dose) %>% # count() ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # group_by(Chemical, Species) %>% # summarise(blood_and_plasma = all(c( # "blood", # "plasma" # ) %in% Media)) %>% # ungroup() %>% # summarise(n_both = sum(blood_and_plasma)) ## ----------------------------------------------------------------------------- # get_data(my_pk) %>% # group_by(Chemical, Species) %>% # summarise( # t_mid = mean(range(Time)), # t_mid_log10 = midpt_log10(Time), # new_time_units = auto_units( # y = Time, # from = "hours", # target = 10 # ) # ) %>% # ungroup() %>% # count(new_time_units) ## ----------------------------------------------------------------------------- # get_data_info(my_pk)$dose_norm_check %>% # dplyr::summarise( # AUC_flag = sum(!is.na(data_flag_AUC)), # Cmax_flag = sum(!is.na(data_flag_Cmax)), # total_multi_dose = sum(n_dose_groups > 1), # total_expts = n() # ) ## ----------------------------------------------------------------------------- # # compute duration and concentration range across experiments (Chemical, Species, Reference, Route, Media, Dose) # data_range <- my_pk$data %>% # filter(Detect, !exclude) %>% # group_by(Chemical, Species, Reference, Media, Route, Dose, Dose.Units) %>% # summarize( # maxT = max(Time) / 24, # concRange = log10(max(Conc) / min(Conc)) # ) %>% # ungroup() # # # Panel A: histogram of day of final timepoint per experiment # sf_2A <- data_range %>% # ggplot(aes(x = maxT)) + # geom_histogram( # fill = "grey5", # bins = 40, # color = "grey5" # ) + # scale_x_log10( # labels = scales::number, # breaks = c(0.1, 1, 7, 30, 365) # ) + # annotation_logticks(sides = "b") + # labs( # x = "Day of final Timepoint", # y = "Count" # ) + # theme( # aspect.ratio = 1, # text = element_text(size = 10), # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # axis.ticks = element_blank(), # axis.line = element_blank() # ) # # # panel B: histogram of fold concentration range # sf_2B <- data_range %>% # ggplot(aes(x = concRange)) + # geom_histogram( # fill = "grey5", # bins = 40, # color = "grey5" # ) + # scale_y_continuous( # expand = c(0, 0), # limits = c(0, 50), # breaks = c(0, 25, 50) # ) + # labs( # x = "log10(Concentration Range)", # y = "Count" # ) + # annotation_logticks(sides = "b") + # theme( # aspect.ratio = 1, # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # strip.background = element_rect(fill = "white"), # strip.text = element_text(face = "bold"), # axis.ticks = element_blank(), # axis.line = element_blank(), # axis.text = element_text(size = 10), # axis.title = element_text(size = 11) # ) # # sf_2AB <- plot_grid(sf_2A, sf_2B, # labels = c("A", "B"), # nrow = 1 # ) # # sf_2AB # # # save PDF version # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig2_ConcTimeRanges", # ".pdf" # ), # height = 3.2, # width = 6, # bg = "white", # units = "in" # ) # # # save PNG version # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig2_ConcTimeRanges", # ".png" # ), # height = 3.2, # width = 6, # bg = "white", # units = "in", # dpi = 300 # ) # # rm(sf_2A, sf_2B, sf_2AB) ## ----------------------------------------------------------------------------- # data_cvt <- get_data(my_pk) # data_counts <- data_cvt %>% # dplyr::filter(Detect) %>% # dplyr::count(Chemical, Species, Reference, Route, Media, Dose, Time, N_Subjects, # name = "Count" # ) # data_counts <- dplyr::left_join(data_counts, data_cvt, # by = c( # "Chemical", "Species", "Reference", # "Route", "Media", "Dose", "Time", # "N_Subjects" # ) # ) %>% # dplyr::mutate(data_descr = dplyr::case_when( # (Count > 1 & N_Subjects == 1) ~ "Individual Data, Multiple Observations", # (Count == 1 & N_Subjects == 1) ~ "Individual Data, Single Observation", # (Count == 1 & N_Subjects > 1 & Conc_SD > 0) ~ "Grouped Data w SD", # (N_Subjects > 1 & Conc_SD == 0) ~ "Grouped Data no SD", # .default = NA_character_ # )) # # indiv_data <- subset(data_counts, # subset = (data_descr %in% "Individual Data, Multiple Observations") # ) # # indiv_data <- indiv_data %>% # dplyr::group_by(Chemical, Species, Reference, Route, Media, Dose, Time) %>% # dplyr::mutate(replicate_mean = mean(Conc, na.rm = TRUE)) %>% # ungroup() # # indiv_data_bygroup <- indiv_data %>% # dplyr::group_by(Chemical, Species) %>% # dplyr::summarise( # AFE = 10^mean(log10(Conc / replicate_mean)), # AAFE = 10^mean(abs(log10(Conc / replicate_mean))) # ) %>% # ungroup() # # indiv_data_bygroup %>% count(AFE < 0.5) # indiv_data_bygroup %>% count(AFE > 2) ## ----------------------------------------------------------------------------- # my_tf <- my_pk %>% twofold_test() ## ----------------------------------------------------------------------------- # pl3A <- my_tf$individual_data %>% # # Plotting # ggplot(aes(x = foldConc)) + # geom_histogram( # binwidth = 0.1, # fill = "grey5", # color = "grey5", # linewidth = 0.4 # ) + # coord_cartesian( # xlim = c(0, 4), # ylim = c(0, 3000) # ) + # scale_y_continuous( # expand = c(0, 0), # breaks = c(0, 250, 500, 750, 1000, 1500, 2000, 2500, 3000), # labels = c("0", "2.5", "5", "7.5", "10", "15", "20", "25", "") # ) + # expand_limits(y = 0) + # geom_vline( # xintercept = c(0.5, 2), # color = "red3", # linetype = "dashed", # linewidth = 1 # ) + # theme_bw() + # labs( # x = "Mean-normalized concentration", # y = "Observations (hundreds))" # ) + # theme( # text = element_text(size = 14), # panel.border = element_rect(color = "black", linewidth = 1, fill = NA), # panel.background = element_blank(), # panel.grid.major.y = element_line(color = "grey90", linewidth = 0.4), # axis.title = element_text(face = "bold"), # axis.line = element_blank(), # axis.text = element_text(size = 14), # plot.background = element_blank(), # axis.ticks.y = element_blank() # ) # # pl3A ## ----------------------------------------------------------------------------- # # get time of peak concentration for use in calculated ADME-normalized time # my_nca <- nca(my_pk) %>% # dplyr::filter(param_name %in% "tmax") # # pl3B <- my_tf$individual_data %>% # inner_join(my_nca %>% # dplyr::select(Chemical, Species, # Route, Media, # tmax = param_value # ) %>% # filter(!is.na(tmax))) %>% # group_by( # Chemical, # Species, # Reference, # Route, # Media, # Dose # ) %>% # mutate( # normTime = ifelse( # Route == "iv", # 1 + Time / max(Time), # ifelse( # Time > tmax, # ((Time - tmax) / max(Time)) + 1, # 0.5 + Time / (2 * tmax) # ) # ) # ) %>% # ggplot(aes( # x = normTime, # y = foldConc # )) + # scale_x_continuous( # breaks = c(1, 2), # labels = c("tmax", "end") # ) + # geom_point(alpha = 0.1, size = 0.7) + # geom_hline( # yintercept = c(0.5, 2), # color = "red3", # linewidth = 0.8, # linetype = "dashed" # ) + # facet_grid( # cols = vars(Route), # scales = "free_x" # ) + # labs( # x = "ADME-Normalized Time", # y = "Mean-Normalized\nConcentration" # ) + # theme( # text = element_text(size = 14), # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # strip.background = element_rect(fill = "white"), # strip.text = element_text(face = "bold"), # axis.line = element_blank(), # axis.text = element_text(size = 14), # axis.title = element_text(face = "bold"), # panel.spacing = unit(0.125, units = "in"), # plot.background = element_blank(), # legend.position = "none" # ) # # pl3B ## ----------------------------------------------------------------------------- # pl3 <- cowplot::plot_grid(pl3A, # pl3B, # ncol = 2, # rel_widths = c(1, 2), # labels = c("A", "B") # ) # # # save PDF version # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig3.pdf" # ), # pl3, # bg = "white", # width = 8, # height = 4, # units = "in" # ) # # # Save PNG version # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig3.png" # ), # pl3, # bg = "white", # width = 8, # height = 4, # units = "in", # dpi = 300 # ) # # # remove unneeded objects # rm(pl3A, pl3B, pl3) ## ----------------------------------------------------------------------------- # plsupp3 <- my_tf$individual_data %>% # inner_join(my_nca %>% # dplyr::select(Chemical, Species, # Route, Media, # tmax = param_value # ) %>% # filter(!is.na(tmax))) %>% # group_by( # Chemical, # Species, # Reference, # Route, # Media, # Dose # ) %>% # mutate( # normTime = ifelse( # Route == "iv", # 1 + Time / max(Time), # ifelse( # Time > tmax, # ((Time - tmax) / max(Time)) + 1, # 0.5 + Time / (2 * tmax) # ) # ) # ) %>% # ggplot(aes( # x = normTime # )) + # scale_x_continuous( # breaks = c(1, 2), # labels = c("tmax", "end") # ) + # geom_histogram() + # facet_grid( # cols = vars(Route), # scales = "free_x" # ) + # labs( # x = "ADME-Normalized Time", # "# Observations" # ) + # theme( # text = element_text(size = 14), # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # strip.background = element_rect(fill = "white"), # strip.text = element_text(face = "bold"), # # axis.ticks = element_blank(), # axis.line = element_blank(), # # axis.text.x = element_blank(), # axis.text = element_text(size = 14), # axis.title = element_text(face = "bold"), # panel.spacing = unit(0.125, units = "in"), # plot.background = element_blank(), # legend.position = "none" # ) # # # save PDF version # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig3", # ".png" # ), # plsupp3, # bg = "white", # width = 8, # height = 4, # units = "in", # dpi = 300 # ) # # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig3", # ".pdf" # ), # plsupp3, # bg = "white", # width = 8, # height = 4, # units = "in" # ) ## ----Winmodelfn--------------------------------------------------------------- # # Evaluation # # Winning Model Tally # winmodel_comparison <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0( # Sys.getenv("OD_DIR"), # # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds" # ) # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # current_rds <- readRDS(file = file_str) # # # Wide winning model # suppressMessages({ # winning_model <- get_winning_model(obj = current_rds) # # wide_winning_model <- winning_model %>% # dplyr::group_by(method, model) %>% # dplyr::count() %>% # tidyr::pivot_wider( # names_from = model, # values_from = n # ) # }) # return(wide_winning_model) # } ## ----------------------------------------------------------------------------- # AUC_plot <- function( # this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0( # Sys.getenv("OD_DIR"), # # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds" # ) # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # message(paste0("Evaluation for: ", pk_name)) # current_rds <- readRDS(file = file_str) # # Cmax and AUC_inf RMSEs # # note that eval_tkstats already filters winning models # # use finite_only = FALSE so that we can count the excluded cases # suppressMessages(RMSLE_eval <- eval_tkstats( # obj = current_rds, # dose_norm = FALSE, # finite_only = FALSE, # suppress.messages = TRUE # ) %>% # mutate(Options = pk_name) %>% # dplyr::select( # Options, # Chemical, # Species, # Route, # Media, # Reference, # method, # model, # starts_with("Cmax"), # starts_with("AUC_") # )) # # these_opts <- paste0( # dose_indic, # log10_indic, # time_indic, # errmodel_indic # ) # # # plot AUC infinity for tkstats vs. AUC infinity for NCA # # show identity line and 20x above and below lines # AUC_plot <- RMSLE_eval %>% # dplyr::filter(is.finite(AUC_infinity.nca) & is.finite(AUC_infinity.tkstats)) %>% # ggplot() + # geom_point(aes( # x = AUC_infinity.nca, # y = AUC_infinity.tkstats, # color = model # )) + # scale_x_log10() + # scale_y_log10() + # annotation_logticks() + # geom_abline(aes(intercept = 0, slope = 1)) + # geom_abline(aes(intercept = log10(20), slope = 1), # linetype = 2 # ) + # geom_abline(aes(intercept = log10(1 / 20), slope = 1), # linetype = 2 # ) + # facet_grid(rows = vars(method)) + # ggtitle(paste( # these_opts, # "AUC_inf pred vs. AUC_inf NCA" # )) + # scale_color_manual( # values = c( # "model_1comp" = "#0398FC", # "model_2comp" = "#D68E09", # "model_flat" = "black" # ), # name = "Winning model" # ) + # coord_equal() # # Cmax_plot <- RMSLE_eval %>% # dplyr::filter(is.finite(Cmax.nca) & is.finite(Cmax.tkstats)) %>% # ggplot() + # geom_point(aes( # x = Cmax.nca, # y = Cmax.tkstats, # color = model # )) + # scale_x_log10() + # scale_y_log10() + # annotation_logticks() + # geom_abline(aes(intercept = 0, slope = 1)) + # geom_abline(aes(intercept = log10(20), slope = 1), # linetype = 2 # ) + # geom_abline(aes(intercept = log10(1 / 20), slope = 1), # linetype = 2 # ) + # facet_grid(rows = vars(method)) + # ggtitle(paste( # these_opts, # "Cmax pred vs. Cmax NCA" # )) + # scale_color_manual( # values = c( # "model_1comp" = "#0398FC", # "model_2comp" = "#D68E09", # "model_flat" = "black" # ), # name = "Winning model" # ) + # coord_equal() # # return( # list( # AUC_plot = AUC_plot, # Cmax_plot = Cmax_plot # ) # ) # } ## ----------------------------------------------------------------------------- # all_AUC_plots <- fitopts %>% # dplyr::rowwise() %>% # dplyr::summarise(this_plot = list(AUC_plot( # this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale # ))) # # save the plots # pdf( # file = paste0( # Sys.getenv("FIG_DIR"), # today, # "_AUCinf_nca_vs_tk_plots.pdf" # ), # onefile = TRUE, # height = 5, # width = 7 # ) # # plotlist <- all_AUC_plots %>% # pull(this_plot) # # for (x in seq_along(plotlist)) { # print(plotlist[[x]][["AUC_plot"]]) # } # dev.off() # # pdf( # file = paste0( # Sys.getenv("FIG_DIR"), # today, # "_Cmax_nca_vs_tk_plots.pdf" # ), # onefile = TRUE, # height = 5, # width = 7 # ) # # plotlist <- all_AUC_plots %>% # pull(this_plot) # # for (x in seq_along(plotlist)) { # print(plotlist[[x]][["Cmax_plot"]]) # } # dev.off() # # rm(plotlist, all_AUC_plots) ## ----RMSLE_Cmax_AUC_fun------------------------------------------------------- # # RMSLE for Cmax and AUC # rmsles_Cmax_AUC <- function( # this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0( # Sys.getenv("OD_DIR"), # # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds" # ) # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # message(paste0("Evaluation for: ", pk_name)) # current_rds <- readRDS(file = file_str) # # Cmax and AUC_inf RMSEs # # note that eval_tkstats already filters winning models # # use finite_only = FALSE so that we can count the excluded cases # suppressMessages(RMSLE_eval <- eval_tkstats( # obj = current_rds, # dose_norm = FALSE, # finite_only = FALSE, # suppress.messages = TRUE # ) %>% # mutate(Options = pk_name) %>% # dplyr::select( # Options, # Chemical, # Species, # Route, # Media, # Reference, # method, # model, # starts_with("Cmax"), # starts_with("AUC_") # )) # # # Couunt bad observations: # # zero, NA, infinite # RMSLE_badobs <- RMSLE_eval %>% # group_by(Options, method) %>% # summarise( # total_expts = n(), # count_AUCinf_NCA_0 = sum(AUC_infinity.nca %in% 0), # count_AUCinf_pred_0 = sum(AUC_infinity.tkstats %in% 0), # count_AUCinf_NCA_isNA = sum(is.na(AUC_infinity.nca)), # count_AUCinf_pred_isNA = sum(is.na(AUC_infinity.tkstats)), # count_AUCinf_NCA_isInf = sum(is.infinite(AUC_infinity.nca)), # count_AUCinf_pred_isInf = sum(is.infinite(AUC_infinity.tkstats)), # count_Cmax_NCA_0 = sum(Cmax.nca %in% 0), # count_Cmax_pred_0 = sum(Cmax.tkstats %in% 0), # count_Cmax_NCA_isNA = sum(is.na(Cmax.nca)), # count_Cmax_pred_isNA = sum(is.na(Cmax.tkstats)), # count_Cmax_NCA_isInf = sum(is.infinite(Cmax.nca)), # count_Cmax_pred_isInf = sum(is.infinite(Cmax.tkstats)) # ) %>% # ungroup() # # # count cases where AUC or Cmax was negative, # # or where AUC_inf was greater than 1e7 # RMSLE_neg <- RMSLE_eval %>% # dplyr::filter( # is.finite(AUC_infinity.nca), # is.finite(AUC_infinity.tkstats), # is.finite(Cmax.nca), # is.finite(Cmax.tkstats) # ) %>% # group_by(Options, method) %>% # summarise( # count_AUCinf_NCA_lt0 = sum(AUC_infinity.nca < 0), # count_AUCinf_pred_lt0 = sum(AUC_infinity.tkstats < 0), # count_Cmax_NCA_lt0 = sum(AUC_infinity.nca < 0), # count_Cmax_pred_lt0 = sum(AUC_infinity.tkstats < 0), # count_AUCinf_NCA_gt_1e7 = sum(AUC_infinity.nca > 1e7), # count_AUCinf_pred_gt_1e7 = sum(AUC_infinity.tkstats > 1e7) # ) # # RMSLE_badobs <- RMSLE_badobs %>% # left_join(RMSLE_neg, by = c( # "Options", # "method" # )) %>% # rowwise() %>% # dplyr::mutate(total_excl = sum( # c_across( # starts_with("count") # ) # )) # # # compute RMSLEs after excluding the bad observations # RMSLE_eval <- RMSLE_eval %>% # dplyr::filter( # is.finite(AUC_infinity.nca), # is.finite(AUC_infinity.tkstats) # ) %>% # dplyr::filter( # AUC_infinity.nca > 0, # AUC_infinity.tkstats > 0, # AUC_infinity.nca < 1e7, # AUC_infinity.tkstats < 1e7 # ) %>% # rowwise() %>% # mutate( # SLE_Cmax = (log10(Cmax.tkstats) - log10(Cmax.nca))^2, # SLE_AUC_inf = (log10(AUC_infinity.tkstats) - log10(AUC_infinity.nca))^2 # ) %>% # dplyr::filter(!is.na(SLE_Cmax), !is.na(SLE_AUC_inf)) %>% # group_by(Options, method) %>% # summarise( # n_expts = n(), # RMSLE_Cmax = sqrt(mean(SLE_Cmax, na.rm = FALSE)) %>% signif(digits = 4), # RMSLE_AUC = sqrt(mean(SLE_AUC_inf, na.rm = FALSE)) %>% signif(digits = 4) # ) %>% # ungroup() # # # merge in counts of bad obs # # RMSLE_eval <- RMSLE_eval %>% # dplyr::left_join(RMSLE_badobs, # by = c("Options", "method") # ) # # return(RMSLE_eval) # } ## ----gof_tbl_fun-------------------------------------------------------------- # # Goodness-of-fit table with R-squared, RMSLE and AIC # get_gof <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0( # Sys.getenv("OD_DIR"), # # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds" # ) # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # current_rds <- readRDS(file = file_str) # winning_model <- suppressMessages(get_winning_model(obj = current_rds)) # # suppressMessages({ # this_rsq <- rsq.pk(current_rds, # use_scale_conc = FALSE, # rsq_group = ggplot2::vars(Chemical, Species), # sub_pLOQ = TRUE # ) %>% # semi_join(winning_model) # # this_AIC <- AIC(current_rds) %>% # semi_join(winning_model) # # this_rmsle <- rmse.pk(current_rds, # rmse_group = vars(Chemical, Species), # use_scale_conc = list( # dose_norm = FALSE, # log10_trans = TRUE # ), # sub_pLOQ = TRUE # ) %>% # semi_join(winning_model) # }) # # # Since they are all joined by winmodel # # all three must have same NROW # message( # paste0("For ", pk_name, "... outputs below must have same number of rows") # ) # # message(paste0("rsq: ", NROW(this_rsq))) # message(paste0("aic: ", NROW(this_AIC))) # message(paste0("rmsle: ", NROW(this_rmsle))) # # gof_df <- this_rsq %>% # inner_join(this_AIC, by = join_by(Chemical, Species, method, model)) %>% # inner_join(this_rmsle, by = join_by(Chemical, Species, method, model)) %>% # mutate(Options = pk_name, .before = Chemical) # # return(gof_df) # } ## ----allpreds_fun------------------------------------------------------------- # # All predictions # get_all_preds <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0( # Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds" # ) # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # # current_rds <- readRDS(file = file_str) # # # Wide winning model # suppressMessages({ # winning_model <- get_winning_model(obj = current_rds) %>% # select(-c(near_flat, preds_below_loq)) # this_preds <- predict(current_rds, # use_scale_conc = FALSE # ) %>% # semi_join(winning_model) %>% # mutate(Options = pk_name, .before = Chemical) # }) # return(this_preds) # } ## ----factor2fun--------------------------------------------------------------- # # Factor of two model error # tf_tests <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0( # Sys.getenv("OD_DIR"), # # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds" # ) # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # current_rds <- readRDS(file = file_str) # print(paste0("Now analyzing: ", pk_name)) # # out <- twofold_test(current_rds, # sub_pLOQ = TRUE, # suppress_messages = TRUE # ) # # out <- out$model_error_summary %>% # mutate(Options = pk_name) # # return(out) # } ## ----do_winmodel-------------------------------------------------------------- # # winning model # system.time(wm_comp <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(winmodel_comp = list( # winmodel_comparison( # this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale # ) # )) %>% # tidyr::unnest(winmodel_comp)) ## ----do_RMSLE_Cmax_AUC-------------------------------------------------------- # # RMSLE for Cmax and AUC # system.time(cmax_auc_rmsle_tbl <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(nca_comp = list( # rmsles_Cmax_AUC( # this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale # ) # )) %>% # tidyr::unnest(nca_comp)) ## ----do_gof_tbl--------------------------------------------------------------- # # GOF table # system.time(gof_tbl <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(gof_win = list( # get_gof( # this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale # ) # )) %>% # tidyr::unnest(gof_win)) ## ----do_factor_two------------------------------------------------------------ # # factor of two # system.time(all_tf_tests <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(TF = list( # tf_tests(error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale # ) # )) %>% # tidyr::unnest(TF)) ## ----do_all_preds------------------------------------------------------------- # # all predictions # system.time(all_preds <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(these_preds = list( # get_all_preds( # this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale # ) # )) %>% # tidyr::unnest(these_preds)) ## ----write_preds-------------------------------------------------------------- # # Save preds to a file as well # saveRDS(all_preds, paste0( # Sys.getenv("FIG_DIR"), # today, # "_all_preds.Rds" # )) # # rm(all_preds) ## ----------------------------------------------------------------------------- # (win_mdl_count_by_opts <- gof_tbl %>% # group_by(Options, method, model) %>% # count() %>% # pivot_wider( # names_from = model, # values_from = n # )) ## ----------------------------------------------------------------------------- # gof_tbl %>% # group_by(Options, method) %>% # count(RMSLE < 1) %>% # filter(`RMSLE < 1`) %>% # arrange(desc(n)) %>% # print(n = 8) # # bobyqa-optimized options 010h, 110h, 110p ## ----------------------------------------------------------------------------- # gof_tbl %>% # group_by(Options, method) %>% # count(Rsq > 0.75) %>% # filter(`Rsq > 0.75`) %>% # arrange(desc(n)) %>% # print(n = 8) ## ----------------------------------------------------------------------------- # rmsle_rsq_rank <- gof_tbl %>% # group_by(Options, method) %>% # count(RMSLE < 1 & Rsq > 0.75) %>% # filter(`RMSLE < 1 & Rsq > 0.75`) %>% # arrange(desc(n)) %>% # dplyr::rename(`# Chemical-Species data groups` = n) # # rmsle_rsq_rank %>% print(n = 8) ## ----------------------------------------------------------------------------- # cmax_auc_rmsle_rank <- cmax_auc_rmsle_tbl %>% # arrange(RMSLE_Cmax, RMSLE_AUC) %>% # select(Options, method, RMSLE_Cmax, RMSLE_AUC) # # cmax_auc_rmsle_rank %>% # slice_head(n = 5) ## ----write_eval_results------------------------------------------------------- # # Write the results to file # writexl::write_xlsx( # x = list( # Fit_Counts = wm_comp, # AUC_and_Cmax_RMSLE_summary = cmax_auc_rmsle_tbl, # Prediction_Evaluations = gof_tbl, # Twofold_Tests = all_tf_tests, # RMSLE_Rsq_rank = rmsle_rsq_rank, # Cmax_AUC_RMSLE_rank = cmax_auc_rmsle_rank # ), # path = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Table2_eval_results", # ".xlsx" # ) # ) # # rm(wm_comp, cmax_auc_rmsle_tbl, gof_tbl, all_tf_tests) # # gc() ## ----plot_rsq_rmsle----------------------------------------------------------- # gof_tbl <- readxl::read_excel( # path = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Table2_eval_results", # ".xlsx" # ), # sheet = "Prediction_Evaluations" # ) # # gof_tbl <- gof_tbl %>% # dplyr::mutate( # opts = substr(Options, 1, 3), # model_text = dplyr::case_when(model %in% "model_1comp" ~ "1comp", # model %in% "model_2comp" ~ "2comp", # model %in% "model_flat" ~ "flat", # .default = NA_character_ # ) # ) # # fig_rsq_rmsle <- ggplot(gof_tbl) + # geom_point(aes( # x = RMSLE, # y = Rsq, # color = model_text # )) + # facet_grid( # rows = vars(opts), # cols = vars(interaction(error_model, method)) # ) + # scale_color_manual( # values = c( # "1comp" = "#0398FC", # "2comp" = "#D68E09", # "flat" = "black" # ), # name = "Winning model" # ) + # theme_bw() # # fig_rsq_rmsle ## ----plot_rsq_rmsle_zoom------------------------------------------------------ # fig_rsq_rmsle_zoom <- ggplot(gof_tbl) + # geom_point(aes( # x = RMSLE, # y = Rsq, # color = model_text # )) + # facet_grid( # rows = vars(opts), # cols = vars(interaction(error_model, method)) # ) + # scale_color_manual( # values = c( # "1comp" = "#0398FC", # "2comp" = "#D68E09", # "flat" = "black" # ), # name = "Winning model" # ) + # coord_cartesian(xlim = c(0, 1)) + # theme_bw() # # fig_rsq_rmsle_zoom ## ----------------------------------------------------------------------------- # # PDF versions # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig4", # ".pdf" # ), # fig_rsq_rmsle, # width = 11, # height = 8.5 # ) # # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig5", # ".pdf" # ), # fig_rsq_rmsle_zoom, # width = 11, # height = 8.5 # ) # # # PNG versions # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig4", # ".png" # ), # fig_rsq_rmsle, # width = 11, # height = 8.5 # ) # # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig5", # ".png" # ), # fig_rsq_rmsle_zoom, # width = 11, # height = 8.5 # ) # # rm(fig_rsq_rmsle_zoom, fig_rsq_rmsle, gof_tbl) ## ----plot_cmax_auc_rmsle------------------------------------------------------ # # read in table # cmax_auc_rmsle_tbl <- readxl::read_xlsx( # path = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Table2_eval_results.xlsx" # ), # sheet = "AUC_and_Cmax_RMSLE_summary" # ) # # make plot # cmax_auc_rmsle_tbl <- cmax_auc_rmsle_tbl %>% # dplyr::mutate(opts = substr(Options, 1, 3)) # # cmax_auc_rmsle_fig <- ggplot(cmax_auc_rmsle_tbl) + # geom_point( # aes( # x = RMSLE_AUC, # y = RMSLE_Cmax, # color = opts # ), # size = 4 # ) + # facet_grid( # rows = vars(error_model), # cols = vars(method) # ) + # scale_color_brewer( # palette = "Paired", # name = "Fit options" # ) + # theme_bw() # # cmax_auc_rmsle_fig # # # save plot # # PDF version # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig6", # ".pdf" # ), # cmax_auc_rmsle_fig, # width = 7, # height = 5 # ) # # # PNG version # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig6", # ".png" # ), # cmax_auc_rmsle_fig, # width = 7, # height = 5, # units = "in", # dpi = 300 # ) # # rm(cmax_auc_rmsle_fig, cmax_auc_rmsle_tbl) ## ----------------------------------------------------------------------------- # all_preds <- readRDS(paste0( # Sys.getenv("FIG_DIR"), # today, # "_all_preds.Rds" # )) # # # select winning fit opts: 110p bobyqa # # all_preds_prep <- all_preds %>% # filter( # Options %in% c("110p"), # method %in% "bobyqa" # ) %>% # mutate( # Conc_est_sub = dplyr::if_else(Conc_est < pLOQ, # pLOQ, # Conc_est # ), # Conc_est_belowLOQ = dplyr::if_else(Conc_est < pLOQ, # TRUE, # FALSE # ), # ID = as.factor(paste(Chemical, Chemical_Name, Species)), # .keep = "unused" # ) %>% # select( # ID, Options, # dose_norm, log10_trans, time_scale, # Route, Media, Dose, # Conc, Conc_est_sub, Conc_est_belowLOQ, # Time, Reference # ) # # split_preds <- split(all_preds_prep, all_preds_prep$ID) # # sp_plots <- lapply(names(split_preds), function(i) { # ggplot( # data = split_preds[[i]], # mapping = aes( # x = Conc, # y = Conc_est_sub, # color = as.factor(Dose), # shape = Reference # ) # ) + # xlab("Observed conc, mg/L") + # ylab("Predicted conc, mg/L") + # geom_point(size = 2) + # geom_abline(slope = 1) + # facet_grid(Route ~ Media, # scales = "free_y" # ) + # scale_y_log10() + # scale_x_log10() + # annotation_logticks(sides = "bl") + # scale_color_viridis_d(name = "Dose, mg/kg") + # theme_bw() + # labs(title = i) + # theme( # legend.position = "bottom", # plot.title = element_text(hjust = 0.5, face = "bold") # ) # }) # # # Very BIG FILE # pdf( # file = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_log10_predvsobs_plots_110pbobyqa_all.pdf" # ), # onefile = TRUE, height = 8, width = 8 # ) # for (x in seq_along(sp_plots)) { # print(sp_plots[[x]]) # } # dev.off() # # rm(sp_plots, split_preds, all_preds_prep, all_preds) ## ----------------------------------------------------------------------------- # my_pk <- readRDS(paste0( # Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_110p.Rds" # )) ## ----data_aggregation--------------------------------------------------------- # # after fitting pk object for all cvt objects or reading the fitted pk object in `setup` # winmodel <- get_winning_model(my_pk, method = "bobyqa") # my_preds <- semi_join(predict(my_pk, use_scale_conc = FALSE, method = "bobyqa"), winmodel) # my_residuals <- residuals(my_pk, use_scale_conc = FALSE, method = "bobyqa") %>% # semi_join(winmodel) # my_tkstats <- eval_tkstats(my_pk, method = "bobyqa") %>% semi_join(winmodel) # my_nca <- get_nca(my_pk) # all_my_data <- get_data(my_pk) # # # merge together long form coefs and long form coef SDs # # my_tf <- twofold_test(my_pk, method = "bobyqa") # # NROW(all_my_data) > NROW(my_preds) # THis must be true... or else try distinct(my_preds) # # my_coef_sd <- coef_sd(my_pk, method = "bobyqa") %>% # this will take a few minutes to run # semi_join(winmodel) # keep only coefs & SDs for winning model # # # Writing file to xlsx # writexl::write_xlsx( # x = list( # predictions = my_preds, # tkstats = my_tkstats, # coefs_sds = my_coef_sd # ), # path = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Table3", # ".xlsx" # ) # ) ## ----------------------------------------------------------------------------- # fe_df <- fold_error(my_pk, # sub_pLOQ = TRUE, # method = "bobyqa" # ) %>% semi_join(winmodel) # # r2_df <- rsq(my_pk, # use_scale_conc = FALSE, # sub_pLOQ = TRUE, # method = "bobyqa" # ) %>% # semi_join(winmodel) # # myAIC <- AIC(my_pk, method = "bobyqa") %>% # semi_join(winmodel) # # myRMSLE <- rmse(my_pk, # use_scale_conc = list( # dose_norm = FALSE, # log10_trans = TRUE # ), # sub_pLOQ = TRUE, # method = "bobyqa" # ) %>% # semi_join(winmodel) # # myRMSE <- rmse(my_pk, # rmse_group = ggplot2::vars( # Chemical, # Species, # Route, # Media, # Dose # ), # sub_pLOQ = TRUE, # method = "bobyqa" # ) %>% # semi_join(winmodel) ## ----------------------------------------------------------------------------- # my_tf$model_error_all %>% # filter(model %in% c("model_1comp", "model_2comp")) %>% # mutate(Route.model = factor(paste(Route, model), # levels = c( # "iv model_1comp", # "iv model_2comp", # "oral model_1comp", # "oral model_2comp" # ) # )) %>% # ggplot(aes( # x = Fold_Error, # fill = Route.model # )) + # geom_histogram( # color = NA, # position = position_stack(), # bins = 50 # ) + # scale_x_log10(labels = scales::label_math(format = log10)) + # annotation_logticks(sides = "b") + # scale_fill_brewer( # palette = "Paired", # name = "Route/winning model" # ) + # expand_limits(y = 0) + # geom_vline(xintercept = c(0.5, 2), color = "black", linetype = "dashed") + # labs( # x = "Predicted/Observed", # y = "Number of Observations" # ) + # theme_classic() + # theme( # aspect.ratio = 1, # panel.border = element_rect(color = "black", fill = NA), # panel.grid.major = element_line(color = "grey95", linewidth = 0.4) # ) + # coord_cartesian(xlim = c(10^(-1.5), 100)) # # # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig7_PredictedObserved_compartmentModels.png" # ), # height = 5, width = 7 # ) # # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig7_PredictedObserved_compartmentModels.pdf" # ), # height = 5, width = 7 # ) ## ----fig4-modelPerform-v-dataVar---------------------------------------------- # my_tf$indiv_data_test_fold_errors %>% # select(model, starts_with("percent_")) %>% # glimpse() # # pl_4A <- my_tf$indiv_data_fold_errors %>% # # For Plotting # ggplot(aes( # y = Fold_Error, # x = foldConc # )) + # geom_bin2d(bins = 55, color = NA) + # geom_hline(yintercept = c(0.5, 2), linetype = "dashed") + # geom_vline(xintercept = c(0.5, 2), linetype = "dashed") + # scale_x_log10( # labels = scales::label_math(format = log10), # limits = c(0.001, 20) # ) + # scale_y_log10( # labels = scales::label_math(format = log10), # limits = c(0.0001, 10000) # ) + # annotation_logticks(sides = "bl") + # scale_fill_viridis_c( # option = "cividis", # limits = c(1, 100), # oob = scales::oob_squish # ) + # labs( # y = "Model Error", # x = "Data Variability", # fill = "Count" # ) + # theme( # aspect.ratio = 1, # panel.border = element_rect(color = "black", fill = NA, size = 1.5), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # strip.background = element_rect(fill = "white"), # strip.text = element_text(face = "bold"), # axis.ticks = element_blank(), # axis.line = element_blank(), # axis.text = element_text(size = 14), # axis.title = element_text(size = 14) # ) # # pl_4A # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # today, # "_Fig4A_ModelPerformance_vDataVariability.png" # ), # height = 5, # width = 7, # device = "png", # dpi = 300, # units = "in" # ) ## ----------------------------------------------------------------------------- # my_table <- my_tf$indiv_data_test_fold_errors %>% # ungroup() %>% # filter(model %in% "All") %>% # select(starts_with("percent_")) %>% # t() %>% # round(digits = 2) %>% # as.data.frame() %>% # rownames_to_column(var = "percentages") %>% # mutate(percentages = case_when( # percentages == "percent_both_within" ~ "Both within a factor of 2", # percentages == "percent_both_outside" ~ "Both outside a factor of 2", # percentages == "percent_model_outside" ~ "Model too variable", # percentages == "percent_data_outside" ~ "Data too variable" # )) # # names(my_table) <- c(" ", "Overall (%)") # # flextable(my_table) %>% # border_inner() %>% # fontsize(part = "all", size = 11) %>% # bold(part = "all", j = 2) %>% # autofit() # # table_plot <- gen_grob( # flextable(my_table) %>% # border_inner() %>% # fontsize(part = "all", size = 10) %>% # bold(part = "all", j = 2) %>% # autofit(), # autowidths = TRUE, # fit = "width" # ) # # # dual_plot <- plot_grid(pl_4A, # table_plot, # ncol = 1, # align = "hv", # rel_heights = c(1, 0.5), # rel_widths = c(1, 0.5) # ) # # dual_plot # # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig4.png" # ), # plot = dual_plot, # height = 4.5, # width = 6, # device = "png", # bg = "white", # dpi = 300, # units = "in" # ) ## ----fig5-goodness-of-fit----------------------------------------------------- # combined_gof_df <- my_tf$model_error_all %>% # group_by(Chemical, Species, model, method) %>% # summarize(within_2fold = sum(between(Fold_Error, 0.5, 2)) / n()) %>% # left_join(r2_df) %>% # left_join(myAIC) %>% # left_join(myRMSLE) %>% # semi_join(winmodel) # # # mypl <- ggplot( # data = combined_gof_df, # aes( # x = within_2fold, # y = Rsq, # color = model # ) # ) + # geom_point() + # geom_abline(slope = 1, color = "black", linetype = "longdash") + # scale_color_manual(values = c("#0398FC", "#D68E09", "black")) + # # coord_cartesian(xlim = c(0,1), ylim = c(0,1)) + # labs( # x = "Fraction of predictions within 2-fold", # y = "R-squared value" # ) + # theme( # aspect.ratio = 1, # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # axis.ticks = element_blank(), # axis.line = element_blank(), # legend.position = "none", # legend.title = element_blank(), # legend.key = element_blank() # ) # # panelA_plot <- ggExtra::ggMarginal(mypl, # groupFill = TRUE, # type = "histogram", # xparams = list(binwidth = 0.05), # yparams = list(binwidth = 0.05) # ) # panelA_plot # # panelB_plot <- ggplot( # data = myRMSE, # aes( # x = RMSE, # fill = model # ) # ) + # scale_x_log10() + # annotation_logticks(sides = "b") + # geom_histogram( # bins = 50, # position = position_stack(), # color = NA # ) + # labs(y = "Count") + # scale_fill_manual(values = c("#0398FC", "#D68E09", "grey10")) + # scale_color_manual(values = c("#0398FC", "#D68E09", "grey10"), guide = "none") + # theme( # text = element_text(size = 10), # # aspect.ratio = 1, # panel.border = element_rect(color = "black", fill = NA, size = 1), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # axis.ticks = element_blank(), # axis.line = element_blank(), # strip.background = element_blank(), # panel.spacing.y = unit(0.125, units = "in"), # legend.title = element_blank(), # legend.position = "bottom" # ) # panelB_plot # # plAB <- patchwork::wrap_elements(panelA_plot) + panelB_plot + # plot_annotation(tag_levels = "A") + plot_layout(guides = "collect", # widths = c(1, 0.9)) & theme(legend.position = "bottom") & guides(color = "none") # # # save PNG version # ggsave( # filename = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig5.png" # ), # plAB, # height = 4, # width = 6.5, # bg = "white", # units = "in" # ) # # # save PDF version # ggsave( # filename = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig5.pdf" # ), # plAB, # height = 4, # width = 6.5, # bg = "white" # ) ## ----goodness-of-fit_exampleFits---------------------------------------------- # pl <- plot(my_pk, # method = "bobyqa", # best_fit = TRUE, # use_scale_conc = list( # dose_norm = TRUE, # log10_trans = FALSE # ), # drop_nonDetect = FALSE # ) # # cvt %>% # filter(curation_set_tag == "CVT_Showa") %>% # pull(analyte_dtxsid) -> Showa_chems # # # my_rsq <- rsq(my_pk, method = "bobyqa") # my_rsq %>% # semi_join(winmodel) %>% # filter(Chemical %in% c( # "DTXSID2020139", # "DTXSID4023917", # "DTXSID3061635", # "DTXSID8030760" # )) # # # High rsq: DTXSID2020139 and DTXSID4023917 # # Low rsq: DTXSID3061635 and DTXSID8030760 # # fe_df %>% # filter(Chemical %in% c( # "DTXSID2020139", # "DTXSID4023917", # "DTXSID3061635", # "DTXSID8030760" # ), Species %in% "rat") %>% # group_by(Chemical, Species) %>% # summarise(frac_2fold = sum(Fold_Error < 2 & Fold_Error > 0.5) / n()) # # # # A tibble: 4 × 3 # # # Groups: Chemical [4] # # Chemical Species frac_2fold # # # # 1 DTXSID2020139 rat 0.271 # # 2 DTXSID3061635 rat 0.125 # # 3 DTXSID4023917 rat 0.953 # # 4 DTXSID8030760 rat 0.778 # # # High frac 2fold: DTXSID4023917 and DTXSID8030760 # # Low frac 2fold: DTXSID2020139 and DTXSID3061635 # # pl_sub <- pl %>% # filter( # Chemical %in% c( # "DTXSID2020139", # "DTXSID4023917", # "DTXSID3061635", # "DTXSID8030760" # ), # Species %in% "rat" # ) # # ex_fits <- pl_sub %>% # pull(final_plot) # ex_fits <- set_names( # ex_fits, # pl_sub$Chemical # ) # # # # cowplot::plot_grid( # plotlist = list( # # Low frac 2fold and high Rsq # ex_fits[["DTXSID2020139"]] + # scale_color_manual(values = c( # "#a6bddb", # "#74a9cf", # "#41bbc4", # "#2b8cbe", # "#045a8d" # )) + # theme(legend.position = "none") + # xlim(0, 30) + # ggtitle("A: DTXSID2020139 rat"), # # high frac 2fold and high Rsq # ex_fits[["DTXSID4023917"]] + # scale_color_manual(values = c( # "black", # "grey40" # )) + # theme(legend.position = "none") + # ggtitle("B: DTXSID4023917 rat"), # # # low frac 2fold and low rsq # ex_fits[["DTXSID3061635"]] + # scale_color_manual(values = c("#006837")) + # theme(legend.position = "none") + # ggtitle("C: DTXSID3061635 rat"), # # # high frac2fold and low rsq # ex_fits[["DTXSID8030760"]] + # scale_color_manual(values = "magenta3") + # theme(legend.position = "none") + # ggtitle("D: DTXSID8030760 rat") # ), # scale = 0.85 # ) # # ggsave( # filename = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig7.png" # ), # height = 12, # width = 12 # ) # # ggsave( # filename = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig7.pdf" # ), # height = 12, # width = 12 # ) ## ----------------------------------------------------------------------------- # flat_chems <- winmodel %>% # filter(model %in% "model_flat") %>% # distinct(Chemical, Species) # print(flat_chems) # # pl <- plot(my_pk, # method = "bobyqa", # best_fit = FALSE, # use_scale_conc = list( # dose_norm = TRUE, # log10_trans = FALSE # ), # drop_nonDetect = FALSE # ) # # flat_fits <- pl %>% # inner_join(flat_chems) %>% # pull(final_plot) # # cowplot::plot_grid(plotlist = flat_fits) # # ggsave( # filename = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig8.pdf" # ), # height = 8, # width = 16, # units = "in" # ) # # ggsave( # filename = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig8.png" # ), # bg = "white", # height = 8, # width = 16, # dpi = 300, # units = "in" # ) ## ----Lombardo_Comparison------------------------------------------------------ # cvt_DTXSIDs <- unique(my_pk$data$Chemical) # # First things first, let's load the data from Lombardo et al. # lombardo <- readxl::read_xlsx( # paste0( # Sys.getenv("OD_DIR"), # "Lombardo2018-Supplemental_82966_revised_corrected.xlsx" # ), # skip = 8 # ) # # # use ctxR to search the Dashboard for DTXSIDs corresponding to these CASRNs # lombardo_dtxsid <- ctxR::chemical_equal_batch( # word_list = unique(lombardo$`CAS #`) # ) # # # merge in the DTXSIDs # lombardo <- lombardo %>% # dplyr::left_join( # lombardo_dtxsid$valid %>% # dplyr::select(searchValue, dtxsid, preferredName), # by = c("CAS #" = "searchValue") # ) # # # any left unidentified: search by name # lombardo_missing <- lombardo %>% # dplyr::filter(is.na(dtxsid)) # # missing_names <- lombardo_missing %>% # dplyr::pull(Name) # # lombardo_dtxsid_name <- ctxR::chemical_equal_batch( # word_list = unique(missing_names) # ) # # lombardo_missing <- lombardo_missing %>% # dplyr::left_join( # lombardo_dtxsid_name$valid %>% # dplyr::select(searchValue, dtxsid, preferredName), # by = c("Name" = "searchValue") # ) # # lombardo <- dplyr::bind_rows( # lombardo %>% filter(!is.na(dtxsid)), # lombardo_missing # ) # # lombard_abbr <- lombardo %>% # dplyr::select(dtxsid, # preferredName, # Vss = `human VDss (L/kg)`, # CLtot = `human CL (mL/min/kg)`, # halflife = `terminal t1/2 (h)` # ) %>% # dplyr::mutate( # Species = "human", # source = "Lombardo et al.", # CLtot = CLtot * 60 / 1000 # Lombardo reports this as mL/min/kg, need L/h/kg # ) %>% # dplyr::arrange(halflife) # # my_pk_vals <- my_tkstats %>% # dplyr::select( # dtxsid = Chemical, # Species, # model, # halflife = halflife.tkstats, # Vss = Vss.tkstats, # CLtot = CLtot.tkstats # ) %>% # dplyr::distinct() %>% # dplyr::mutate(source = "invivoPKfit") %>% # dplyr::filter( # model != "model_flat", # !is.na(halflife) # ) # # lombardo_inner <- my_pk_vals %>% # dplyr::inner_join(lombard_abbr, by = "dtxsid") # # 79 chemicals in common # # lombardo_comparison <- dplyr::bind_rows( # lombard_abbr %>% # dplyr::filter( # dtxsid %in% # lombardo_inner$dtxsid # ), # my_pk_vals %>% # dplyr::filter( # dtxsid %in% # lombardo_inner$dtxsid # ) # ) # # # merge in chemical names # # # get preferred names by DTXSID # all_details <- ctxR::get_chemical_details_batch( # unique(lombardo_comparison$dtxsid) # ) # # lombardo_comparison <- lombardo_comparison %>% # select(-preferredName) %>% # left_join(all_details %>% select(dtxsid, preferredName)) # # # create a factor variable for chemical names that sorts chemicals from highest to lowest Lombardo halflife # # chemnames_levels <- lombardo_comparison %>% # dplyr::filter(source %in% "Lombardo et al.") %>% # dplyr::arrange(halflife) %>% # dplyr::pull(preferredName) # # # lombardo_comparison <- lombardo_comparison %>% # dplyr::mutate(preferredName = factor(preferredName, # levels = chemnames_levels # )) # # # get invivoPKfit species for plot-splitting purposes # ivpk_species <- lombardo_comparison %>% # dplyr::filter(source %in% "invivoPKfit") %>% # dplyr::distinct(dtxsid, Species) %>% # dplyr::rename(invivopkfit_species = Species) # # lombardo_comparison <- lombardo_comparison %>% # dplyr::left_join(ivpk_species, # by = "dtxsid" # ) # # # reshape longer and plot # lombardo_comparison %>% # tidyr::pivot_longer(cols = c("Vss", "CLtot", "halflife")) %>% # dplyr::mutate(name = factor(name, # levels = c("halflife", "Vss", "CLtot") # )) %>% # ggplot(mapping = aes( # x = value, # y = preferredName # )) + # geom_point( # aes( # shape = source, # color = source # ), # # position = position_dodge(0.7), # size = 5 / .pt, stroke = 2.5 / .pt # ) + # facet_grid( # cols = vars(name), # rows = vars(invivopkfit_species), # scales = "free", # space = "free_y", # drop = TRUE # ) + # scale_x_log10(labels = scales::label_math(format = log10)) + # # annotation_logticks(sides = "b") + # scale_shape_manual(values = c( # "Lombardo et al." = 3, # "invivoPKfit" = 22 # )) + # scale_color_manual(values = c("#d95f02", "#1b9e77")) + # guides(x = "axis_logticks") + # theme( # panel.border = element_rect( # color = "black", # fill = NA, # linewidth = 1.5 # ), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # panel.spacing.y = unit(0, "line"), # strip.background = element_rect(fill = "white"), # strip.text.x = element_text(face = "bold", size = 12), # strip.text.y = element_blank(), # text = element_text(size = 10), # axis.ticks = element_blank(), # axis.line = element_blank(), # axis.text.y = element_text(face = "bold", size = 6), # axis.text.x = element_text(size = 10), # axis.title.y = element_blank(), # axis.title.x = element_blank(), # legend.position = "bottom", # legend.key = element_blank(), # legend.title = element_text(face = "bold"), # legend.text = element_text() # ) # # ggsave( # filename = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig6.png" # ), # height = 11, # width = 10, # units = "in", # device = "png", # dpi = 300 # ) # # ggsave( # filename = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig6.pdf" # ), # height = 11, # width = 10, # units = "in" # ) ## ----musther-read------------------------------------------------------------- # species_cols <- c( # paste("Mouse", # c( # "Dosing Formulation", # "Strain", # "Sex", # "F% Mean", # "F% Range" # ), # sep = "." # ), # paste("Rat", # c( # "Dosing Formulation", # "Strain", # "Sex", # "F% Mean", # "WSD", # "F% Range" # ), # sep = "." # ), # paste("Dog", # c( # "Dosing Formulation", # "Strain", # "Sex", # "F% Mean", # "WSD", # "F% Range" # ), # sep = "." # ), # paste("Non-Human Primate", # c( # "Dosing Formulation", # "Strain", # "Sex", # "F% Mean", # "WSD", # "F% Range" # ), # sep = "." # ), # paste("Human", # c( # "Dosing Formulation", # "Sex", # "F% Mean", # "WSD", # "F% Range" # ), # sep = "." # ) # ) # # # # musther2014 <- readxl::read_excel( # path = paste0( # Sys.getenv("OD_DIR"), # "SupTableMusther2014.xlsx" # ), # sheet = "Sheet1", # skip = 1, # col_names = c( # "Compound", # "CAS", # "MW", "Ion Class", # species_cols, # "References.mouse", # "References.rat", # "References.dog", # "References.NHP", # "References.human" # ), # col_types = "text" # ) # # # pivot longer # musther2014_long <- musther2014 %>% # select(c( # Compound, # CAS, # contains("F% Mean") # )) %>% # pivot_longer( # cols = !c(Compound, CAS), # names_to = c("Species", "stat"), # names_sep = "\\." # ) %>% # mutate( # value = gsub( # x = value, # pattern = "<", # replacement = "" # ), # value_num = as.numeric(value) # ) ## ----musther-ccd-------------------------------------------------------------- # musther2014_dtxsid <- ctxR::chemical_equal_batch(word_list = unique(musther2014_long$CAS)) # # musther2014_long <- musther2014_long %>% # left_join(musther2014_dtxsid$valid %>% select(searchValue, dtxsid, preferredName), # by = c("CAS" = "searchValue") # ) ## ----musther-getfgutabs------------------------------------------------------- # fgutabs <- coef(my_pk, method = "bobyqa") %>% # distinct() %>% # semi_join(winmodel) %>% # keep only winning models # rowwise() %>% # mutate(Fgutabs = coefs_vector["Fgutabs"]) %>% # filter(!is.na(Fgutabs)) ## ----------------------------------------------------------------------------- # length(intersect( # musther2014_long$dtxsid, # get_data(my_pk)$Chemical # )) ## ----musther-join------------------------------------------------------------- # fgutabs_pk_musther <- fgutabs %>% inner_join(musther2014_long, # by = c("Chemical" = "dtxsid") # ) ## ----musther-plot------------------------------------------------------------- # # capitalize invivopkfit species to harmonize with Musther # fgutabs_pk_musther <- fgutabs_pk_musther %>% # dplyr::mutate(Species.x = stringr::str_to_sentence(Species.x)) # # ggplot(fgutabs_pk_musther) + # geom_point( # aes( # y = preferredName, # x = Fgutabs, # shape = Species.x, # color = "invivopkfit" # ), # size = 4, # stroke = 2 # ) + # geom_point( # aes( # y = preferredName, # x = value_num / 100, # shape = Species.y, # color = "Musther et al. 2014" # ), # size = 4, # stroke = 2 # ) + # scale_shape_manual( # values = c( # "Rat" = 21, # "Mouse" = 22, # "Dog" = 23, # "Non-Human Primate" = 24, # "Human" = 3 # ), # breaks = c("Rat", "Mouse", "Dog", "Non-Human Primate", "Human") # ) + # theme_bw() + # theme( # axis.title.y = element_blank(), # legend.title = element_blank(), # axis.title.x = element_text(size = 12), # axis.text = element_text(size = 12), # legend.text = element_text(size = 12) # ) # # ggsave( # filename = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig9.pdf" # ), # height = 5, # width = 8 # ) # # ggsave( # filename = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig9.png" # ), # height = 5, # width = 8, # device = "png", dpi = 300 # ) ## ----do_fit-benchmarking------------------------------------------------------ # # Randomize the order of chemical species # chem_spec <- cvt %>% # dplyr::distinct(analyte_dtxsid, species) %>% # dplyr::slice_sample(prop = 1) # # test_group_size <- seq(25, 105, by = 10) # fit_options <- expand.grid( # error_model = c( # "pooled", # "hierarchical" # ), # dose_norm = FALSE, # log10_trans = TRUE, # time_scale = c( # TRUE, # FALSE # ), # stringsAsFactors = FALSE # ) # # fit_options <- tidyr::expand_grid(fit_options, test_group_size) # # fit_options <- fit_options %>% # rowwise() %>% # mutate( # this_data = list({ # tmp <- chem_spec %>% # slice_head(n = test_group_size) %>% # left_join(cvt, by = join_by(analyte_dtxsid, species)) %>% # pk() + # scale_time(new_units = ifelse(!time_scale, # "identity", # "auto" # )) + # scale_conc(dose_norm = dose_norm, log10_trans = log10_trans) + # settings_preprocess(suppress.messages = TRUE) # do_prefit(tmp) # }), # data_size = chem_spec %>% slice_head(n = test_group_size) %>% # left_join(cvt, by = join_by(analyte_dtxsid, species)) %>% # NROW() # ) # # # Organization of the benchmarking # # For each fit_option, return the four values of system.time() that we want # df_out <- fit_options %>% # rowwise() %>% # mutate( # tim_1core = system.time(do_fit(this_data))[["elapsed"]], # tim_4core = system.time(do_fit(this_data, n_cores = 4))[["elapsed"]], # tim_7core = system.time(do_fit(this_data, n_cores = 7))[["elapsed"]], # tim_12core = system.time(do_fit(this_data, n_cores = 12))[["elapsed"]] # ) # # df_out <- df_out %>% select(-this_data) # gc() # # full_long <- df_out %>% # pivot_longer( # cols = c(tim_1core:tim_12core), # names_to = "N_cores", # values_to = "Time_s" # ) %>% # group_by(N_cores, test_group_size, data_size) %>% # summarize( # mean_time = mean(Time_s, na.rm = TRUE) / 60, # max_time = max(Time_s, na.rm = TRUE) / 60, # min_time = min(Time_s, na.rm = TRUE) / 60 # ) %>% # mutate(N_cores = as.numeric(str_extract(N_cores, "[:digit:]+"))) # # df_out %>% clipr::write_clip() # ggplot( # full_long, # aes( # x = test_group_size, # y = mean_time, # color = as.factor(N_cores) # ) # ) + # geom_point() + # geom_linerange(aes( # ymin = min_time, # ymax = max_time # )) + # geom_line(aes(group = N_cores)) + # labs( # x = "Number of Data Groups", # y = "Runtime (minutes)", # title = "Number of Processing Cores", # subtitle = "(with min/max runtime range)" # ) + # facet_grid(cols = vars(N_cores)) + # scale_color_manual(values = c("black", "#40c679", "#31a354", "#006837")) + # coord_fixed(ratio = 8) + # theme( # panel.border = element_rect(color = "black", fill = NA, size = 1.5), # panel.background = element_blank(), # panel.grid.major = element_line(color = "grey90", linewidth = 0.5), # legend.position = "none", # plot.title = element_text(hjust = 0.5), # plot.subtitle = element_text(hjust = 0.5), # axis.ticks = element_blank(), # axis.line = element_blank(), # strip.background = element_blank() # ) # # # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Fig10.png" # ), # width = 5, # height = 2.5, # units = "in" # ) ## ----fit_summary_bydatagrp---------------------------------------------------- # fit_summary_bydatagrp <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0( # Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds" # ) # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # my_pk <- readRDS(file = file_str) # # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # message(paste0("Summary by data group for: ", pk_name)) # # myAFE <- AFE(my_pk, # use_scale_conc = FALSE, # sub_pLOQ = TRUE # ) # # myAAFE <- AAFE(my_pk, # use_scale_conc = FALSE, # sub_pLOQ = TRUE # ) # # my_r2 <- rsq(my_pk, # use_scale_conc = FALSE, # sub_pLOQ = TRUE # ) # # myAIC <- AIC(my_pk) # # myRMSLE <- rmse(my_pk, # use_scale_conc = list( # dose_norm = FALSE, # log10_trans = TRUE # ), # sub_pLOQ = TRUE # ) # # # myRMSE <- rmse(my_pk, # use_scale_conc = FALSE, # sub_pLOQ = TRUE # ) # # winmodel <- get_winning_model(my_pk) %>% # select(Chemical, Species, method, model) %>% # dplyr::mutate(is_winning_model = TRUE) # # # # note convergence codes for each model # pf <- my_pk$fit %>% # dplyr::distinct(Chemical, Species, model, method, convcode) # # # convcode_labels <- data.frame( # convcode = c( # 0, # 1, # 20, # 21, # 10, # 51, # 52, # 9999, # -9999 # ), # convcode_label = c( # "Successful convergence (see ?optimx::optimx)", # "The iteration limit maxit had been reached (see ?optimx::optimx)", # "the initial set of parameters is inadmissible, that is, that the function cannot be computed or returns an infinite, NULL, or NA value (see ?optimx::optimx))", # "an intermediate set of parameters is inadmissible (see ?optimx::optimx)", # "degeneracy of the Nelder–Mead simplex (see ?optimx::optimx)", # 'a warning from the "L-BFGS-B" method (see ?optimx::optimx)', # 'an error from the "L-BFGS-B" method (see ?optimx::optimx)', # "error in optimx::optimx()", # 'model status was "abort"' # ) # ) # # pf <- pf %>% # dplyr::left_join(convcode_labels) # # # join absolutely everything together # summ_by_datagroup <- pf %>% # left_join(winmodel) %>% # left_join(myAIC) %>% # left_join(myAFE) %>% # left_join(myAAFE) %>% # left_join(my_r2) %>% # left_join(myRMSLE) %>% # left_join(myRMSE) %>% # mutate(is_winning_model = case_when(is_winning_model %in% TRUE ~ TRUE, # is.na(is_winning_model) ~ FALSE, # .default = FALSE # )) %>% # arrange(Chemical, Species, method, model) # # return(summ_by_datagroup) # } ## ----------------------------------------------------------------------------- # fit_summary_byexpt <- function(this_error_model, # this_dose_norm, # this_log10_trans, # this_time_scale) { # dose_indic <- as.numeric(this_dose_norm) # log10_indic <- as.numeric(this_log10_trans) # time_indic <- as.numeric(this_time_scale) # errmodel_indic <- substr(this_error_model, 1, 1) # # file_str <- paste0( # Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_", # dose_indic, # log10_indic, # time_indic, # errmodel_indic, # ".Rds" # ) # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # my_pk <- readRDS(file = file_str) # # pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic) # # message(paste0("Summary by experiment for: ", pk_name)) # # my_tkstats <- eval_tkstats(my_pk, # model = NULL # ) # # winmodel <- get_winning_model(my_pk) %>% # select(Chemical, Species, method, model) %>% # dplyr::mutate(is_winning_model = TRUE) # # pf <- my_pk$fit %>% # dplyr::distinct(Chemical, Species, model, method, convcode) # # # convcode_labels <- data.frame( # convcode = c( # 0, # 1, # 20, # 21, # 10, # 51, # 52, # 9999, # -9999 # ), # convcode_label = c( # "Successful convergence (see ?optimx::optimx)", # "The iteration limit maxit had been reached (see ?optimx::optimx)", # "the initial set of parameters is inadmissible, that is, that the function cannot be computed or returns an infinite, NULL, or NA value (see ?optimx::optimx))", # "an intermediate set of parameters is inadmissible (see ?optimx::optimx)", # "degeneracy of the Nelder–Mead simplex (see ?optimx::optimx)", # 'a warning from the "L-BFGS-B" method (see ?optimx::optimx)', # 'an error from the "L-BFGS-B" method (see ?optimx::optimx)', # "error in optimx::optimx()", # 'model status was "abort"' # ) # ) # # pf <- pf %>% # dplyr::left_join(convcode_labels) # # # summ_by_expt <- pf %>% # left_join(winmodel) %>% # left_join(my_tkstats) %>% # mutate(is_winning_model = case_when(is_winning_model %in% TRUE ~ TRUE, # is.na(is_winning_model) ~ FALSE, # .default = FALSE # )) %>% # arrange(Chemical, Species, method, model) # # return(summ_by_expt) # } ## ----------------------------------------------------------------------------- # summary_datagrp_all <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(summ = list( # fit_summary_bydatagrp( # this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale # ) # )) %>% # tidyr::unnest(cols = c(summ)) # # summary_expt_all <- fitopts %>% # dplyr::rowwise() %>% # dplyr::mutate(summ_expt = list( # fit_summary_byexpt( # this_error_model = error_model, # this_dose_norm = dose_norm, # this_log10_trans = log10_trans, # this_time_scale = time_scale # ) # )) %>% # tidyr::unnest(cols = c(summ_expt)) # # writexl::write_xlsx( # list( # "Summary by data grp" = summary_datagrp_all, # "Summary by expt" = summary_expt_all # ), # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Supp_Table4.xlsx" # ) # ) ## ----------------------------------------------------------------------------- # # set up pk analysis for single-study # pk_study <- minimal_pk + # facet_data( # facets = vars(Chemical, Species, Reference) # ) + # stat_error_model( # error_group = vars(Chemical, Species, Reference) # ) + # settings_preprocess( # keep_data_original = FALSE, # suppress.messages = TRUE # ) + # scale_conc( # dose_norm = TRUE, # log10_trans = TRUE # ) + # settings_optimx(method = "bobyqa") + # stat_model(model = c( # "model_flat", # "model_1comp", # "model_2comp" # )) # # # do pk analysis # system.time( # pk_study <- do_fit(pk_study, # n_cores = 11 # ) # ) ## ----------------------------------------------------------------------------- # saveRDS( # pk_study, # paste0( # Sys.getenv("OD_DIR"), # today, # "studylevel_pk_fit_110p.Rds" # ) # ) ## ----------------------------------------------------------------------------- # tkstats_study <- eval_tkstats(pk_study, # finite_only = FALSE, # dose_norm = TRUE, # model = "winning" # ) %>% # dplyr::select( # Chemical, # Species, # Reference, # method, # model, # halflife.tkstats, # Vss.tkstats, # `Vss/Fgutabs.tkstats` # ) %>% # dplyr::rename( # halflife = halflife.tkstats, # Vss = Vss.tkstats, # `Vss/Fgutabs` = `Vss/Fgutabs.tkstats` # ) %>% # dplyr::distinct() # # # Also: get coefficients and try to pull Fgutabs (if it was optimized) # coef_study <- coef_sd(pk_study, # method = "bobyqa", # include_type = "optimize" # ) %>% # dplyr::filter(param_name %in% "Fgutabs") %>% # dplyr::select( # Chemical, # Species, # Reference, # method, # model, # param_name, # param_value # ) %>% # dplyr::distinct() # # # keep only winning model coefs # win_study <- get_winning_model(pk_study) %>% # dplyr::select(-near_flat, -preds_below_loq) # # coef_study <- coef_study %>% # dplyr::semi_join(win_study) # # fgutabs_study <- coef_study %>% # select(-param_name) %>% # rename(Fgutabs = param_value) # # tkstats_study2 <- full_join( # tkstats_study, # fgutabs_study # ) ## ----------------------------------------------------------------------------- # file_str <- paste0( # Sys.getenv("OD_DIR"), # read_date_pk, # "mypk_fit_110p.Rds" # ) # pk_pooled <- readRDS(file = file_str) # # tkstats_pooled <- eval_tkstats(pk_pooled, # finite_only = FALSE, # dose_norm = TRUE, # method = "bobyqa", # model = "winning" # ) %>% # dplyr::select( # Chemical, # Species, # Reference, # method, # model, # halflife.tkstats, # Vss.tkstats, # `Vss/Fgutabs.tkstats` # ) %>% # dplyr::rename( # halflife = halflife.tkstats, # Vss = Vss.tkstats, # `Vss/Fgutabs` = `Vss/Fgutabs.tkstats` # ) %>% # dplyr::group_by( # Chemical, # Species, # method, # model, # halflife, # Vss, # `Vss/Fgutabs` # ) %>% # dplyr::summarise(n_ref = n_distinct(Reference)) %>% # dplyr::ungroup() # # # Also: get coefficients and try to pull Fgutabs # coef_pooled <- coef_sd(pk_pooled, # method = "bobyqa", # include_type = "optimize" # ) %>% # dplyr::filter(param_name %in% "Fgutabs") %>% # dplyr::select( # Chemical, # Species, # method, # model, # param_name, # param_value # ) %>% # dplyr::distinct() # # # keep only winning model coefs # win_pooled <- get_winning_model(pk_pooled, # method = "bobyqa" # ) %>% # dplyr::select(-near_flat, -preds_below_loq) # # coef_pooled <- coef_pooled %>% # dplyr::semi_join(win_pooled) # # fgutabs_pooled <- coef_pooled %>% # select(-param_name) %>% # rename(Fgutabs = param_value) # # tkstats_pooled2 <- full_join( # tkstats_pooled, # fgutabs_pooled # ) ## ----------------------------------------------------------------------------- # tkstats_study3 <- tkstats_study2 %>% # dplyr::rename(model_study = model) %>% # pivot_longer( # cols = c( # halflife, # Vss, # `Vss/Fgutabs`, # Fgutabs # ), # names_to = "name", # values_to = "value_study" # ) # # tkstats_pooled3 <- tkstats_pooled2 %>% # dplyr::rename( # model_pooled = model, # n_ref_pooled = n_ref # ) %>% # pivot_longer( # cols = c( # halflife, # Vss, # `Vss/Fgutabs`, # Fgutabs # ), # names_to = "name", # values_to = "value_pooled" # ) # # tkstats_all <- full_join( # tkstats_study3, # tkstats_pooled3 # ) ## ----------------------------------------------------------------------------- # tkstats_all <- tkstats_all %>% # filter(!(model_pooled %in% "model_flat") & # !(model_study %in% "model_flat")) ## ----------------------------------------------------------------------------- # ggplot(tkstats_all %>% # filter(!is.na(value_study) & # !is.na(value_pooled))) + # geom_point(aes( # x = value_pooled, # y = value_study, # color = factor(n_ref_pooled) # )) + # facet_wrap( # facets = vars(name), # scales = "free" # ) + # scale_x_log10(guide = "axis_logticks") + # scale_y_log10(guide = "axis_logticks") ## ----------------------------------------------------------------------------- # ggplot(tkstats_all %>% # filter(n_ref_pooled > 1 & # !is.na(value_study) & # !is.na(value_pooled))) + # geom_point( # aes( # x = value_pooled, # y = value_study # ), # color = "gray50", # shape = 1 # ) + # geom_line( # aes( # x = value_pooled, # y = value_study, # group = interaction(Chemical, Species) # ), # color = "gray50" # ) + # facet_wrap( # facets = vars(name), # scales = "free" # ) + # geom_abline( # intercept = 0, slope = 1, # color = "black" # ) + # geom_smooth( # aes( # x = value_pooled, # y = value_study # ), # method = "lm", # se = FALSE, # color = "black", # linetype = 2 # ) + # scale_x_log10(guide = "axis_logticks") + # scale_y_log10(guide = "axis_logticks") + # theme_bw() + # theme(strip.background = element_blank()) ## ----------------------------------------------------------------------------- # rsq_df <- tkstats_all %>% # filter(n_ref_pooled > 1 & # !is.na(value_study) & # !is.na(value_pooled)) %>% # mutate( # log10_value_pooled = log10(value_pooled), # log10_value_study = log10(value_study) # ) %>% # group_by(name) %>% # summarise( # n = n(), # n_grp = n_distinct(Chemical, Species), # rsq_log10 = summary( # lm( # log10_value_study ~ log10_value_pooled, # na.action = "na.omit" # ) # )[[ # "r.squared" # ]], # rsq = summary(lm(value_study ~ value_pooled))[[ # "r.squared" # ]] # ) %>% # ungroup() %>% # mutate(label = sprintf( # "R^2 == %.3f", # rsq_log10 # )) # # rsq_df %>% # select(-label) %>% # print() ## ----------------------------------------------------------------------------- # ggplot(tkstats_all %>% # filter(n_ref_pooled > 1 & # !is.na(value_study) & # !is.na(value_pooled))) + # geom_point( # aes( # x = value_pooled, # y = value_study # ), # color = "gray50", # shape = 1 # ) + # geom_line( # aes( # x = value_pooled, # y = value_study, # group = interaction(Chemical, Species) # ), # color = "gray50" # ) + # geom_text( # data = rsq_df, # aes( # x = 0, # y = Inf, # label = label # ), # parse = TRUE, # hjust = -0.3, # vjust = 1.8 # ) + # facet_wrap( # facets = vars(name), # scales = "free" # ) + # geom_abline( # intercept = 0, slope = 1, # color = "black" # ) + # geom_smooth( # aes( # x = value_pooled, # y = value_study # ), # method = "lm", # se = FALSE, # color = "black", # linetype = 2 # ) + # scale_x_log10(guide = "axis_logticks") + # scale_y_log10(guide = "axis_logticks") + # xlab("Pooled value") + # ylab("Single-study value") + # theme_bw() + # theme( # strip.background = element_blank(), # strip.text = element_text(size = 12) # ) # # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_Fig8.png" # ), # height = 5, # width = 7 # ) # # ggsave( # paste0( # Sys.getenv("FIG_DIR"), # today, # "Manu_Files/", # "_Fig8.pdf" # ), # height = 5, # width = 7 # ) ## ----------------------------------------------------------------------------- # tkstats_all_wide <- tkstats_all %>% # pivot_wider( # id_cols = c( # Chemical, Species, # Reference, method, # model_study, # model_pooled, # n_ref_pooled # ), # names_from = name, # values_from = c(value_study, value_pooled) # ) # # tkstats_all_wide %>% # filter(n_ref_pooled > 1) %>% # writexl::write_xlsx(path = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "Supp_Table5.xlsx" # )) ## ----------------------------------------------------------------------------- # plot_pooled <- plot(pk_pooled, # method = "bobyqa", # use_scale_conc = TRUE, # best_fit = TRUE # ) # plot_pooled %>% # filter(Chemical %in% "DTXSID2020139" & Species %in% "rat") %>% # pull(final_plot[[1]]) ## ----------------------------------------------------------------------------- # plot_pooled <- plot(pk_pooled, # method = "bobyqa", # use_scale_conc = TRUE, # print_out = TRUE # ) # # ggsave( # filename = paste0( # Sys.getenv("FIG_DIR"), # "Manu_Files/", # today, # "_allplots.pdf" # ), # plot = gridExtra::marrangeGrob(plot_pooled, nrow = 1, ncol = 1), # width = 15, height = 9 # ) ## ----information-------------------------------------------------------------- # sessionInfo()