## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, crop = NULL) library(BiocStyle) library(dplyr) library(GO.db) library(SomaDataIO) library(SomaScan.db) ## ----load-pkgs---------------------------------------------------------------- library(dplyr) library(GO.db) library(SomaDataIO) library(SomaScan.db) ## ----read-adat---------------------------------------------------------------- # Retrieve the example ADAT file from the SomaDataIO package file <- system.file("extdata", "example_data10.adat", package = "SomaDataIO", mustWork = TRUE ) # Read in the file adat <- SomaDataIO::read_adat(file) adat ## ----age-summary-------------------------------------------------------------- summary(adat$Age) ## ----age-hist, fig.height=3.5, fig.width=5------------------------------------ hist(adat$Age, xlab = "Age (years)", main = "Age Distribution" ) ## ----rm-na-------------------------------------------------------------------- dplyr::filter(adat, is.na(Sex)) %>% dplyr::select(SlideId, Age) ## ----------------------------------------------------------------------------- adat <- dplyr::filter(adat, !is.na(Age)) ## ----n-analytes--------------------------------------------------------------- analytes <- SomaDataIO::getAnalytes(adat) head(analytes) ## ----------------------------------------------------------------------------- length(analytes) ## ----log-trans---------------------------------------------------------------- log10_adat <- log10(adat) ## ----cor-test----------------------------------------------------------------- # Calculate correlations for each analyte cors_df <- lapply(analytes, function(x) { results <- stats::cor.test(log10_adat$Age, log10_adat[[x]], method = "spearman", exact = FALSE) results <- results[c("estimate", "p.value")] unlist(results) }) %>% setNames(analytes) # Bind results together into a single dataframe cors_df <- dplyr::bind_rows(cors_df, .id = "analytes") # Isolate top positive correlations top_posCor <- cors_df %>% dplyr::filter(p.value < 0.05) %>% # Retain significant cors only dplyr::filter(estimate.rho >= 0.75) %>% # Retain strong correlations arrange(desc(estimate.rho)) nrow(top_posCor) head(top_posCor, 20L) ## ----columns------------------------------------------------------------------ columns(SomaScan.db) ## ----GO-columns--------------------------------------------------------------- grep("GO|ONTOLOGY", columns(SomaScan.db), value = TRUE) ## ----top-10------------------------------------------------------------------- top10_analytes <- head(top_posCor$analytes, 10L) anno <- SomaScan.db::select(SomaScan.db, keys = top10_analytes, columns = c("SYMBOL", "GENENAME", "GENETYPE")) anno ## ----------------------------------------------------------------------------- go_anno <- SomaScan.db::select(SomaScan.db, keys = anno$PROBEID[1:3L], columns = c("SYMBOL", "GENENAME", "GENETYPE", "GO", "ONTOLOGY")) %>% dplyr::filter(ONTOLOGY == "BP") go_anno ## ----------------------------------------------------------------------------- go_terms <- AnnotationDbi::select(GO.db, keys = go_anno$GO, columns = c("GOID", "TERM", "DEFINITION")) go_terms ## ----join-go------------------------------------------------------------------ final_df <- left_join(go_anno, go_terms, by = c("GO" = "GOID")) final_df ## ----session-info------------------------------------------------------------- sessionInfo()