## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) library(BiocStyle) library(GO.db) library(KEGGREST) library(org.Hs.eg.db) library(SomaScan.db) library(withr) ## ----load-pkgs, warning = FALSE, message = FALSE------------------------------ library(GO.db) library(KEGGREST) library(org.Hs.eg.db) library(SomaScan.db) library(withr) ## ----il31-select-df----------------------------------------------------------- il31_go <- select(SomaScan.db, keys = "IL31", keytype = "SYMBOL", columns = c("PROBEID", "GO")) il31_go ## ----go-term------------------------------------------------------------------ Term(il31_go$GO) ## ----go-definition------------------------------------------------------------ Definition(il31_go$GO) ## ----go-synonym--------------------------------------------------------------- Synonym(il31_go$GO) ## ----append-terms------------------------------------------------------------- trms <- Term(il31_go$GO) class(trms) length(trms) == length(il31_go$GO) il31_go$TERM <- trms il31_go ## ----append-definitions------------------------------------------------------- defs <- Definition(il31_go$GO) class(defs) length(defs) == length(il31_go$GO) il31_go$DEFINITION <- defs il31_go[ ,c("SYMBOL", "PROBEID", "GO", "TERM", "DEFINITION")] ## ----example-go-ids----------------------------------------------------------- go_ids <- select(SomaScan.db, "IL3RA", keytype = "SYMBOL", columns = c("GO", "SYMBOL")) go_ids ## ----go-columns--------------------------------------------------------------- columns(GO.db) go_defs <- select(GO.db, keys = go_ids$GO, columns = c("GOID", "TERM", "DEFINITION")) go_defs ## ----go-merge----------------------------------------------------------------- merge(go_ids, go_defs, by.x = "GO", by.y = "GOID") ## ----select-kegg-------------------------------------------------------------- kegg_sel <- select(SomaScan.db, keys = "CD86", keytype = "SYMBOL", columns = c("PROBEID", "PATH")) kegg_sel ## ----get-path-names----------------------------------------------------------- # Add prefix indicating species (hsa = Homo sapiens) hsa_names <- paste0("hsa", kegg_sel$PATH) kegg_res <- keggGet(dbentries = hsa_names) |> setNames(hsa_names[1:10L]) # Setting names for results list ## ----str-kegg-path------------------------------------------------------------ str(kegg_res$hsa04514) ## ----path-names-vector-------------------------------------------------------- kegg_names <- vapply(kegg_res, `[[`, i = "NAME", "", USE.NAMES = FALSE) kegg_names ## ----append-names------------------------------------------------------------- kegg_sel$PATHNAME <- kegg_names kegg_sel ## ----seqid-gene--------------------------------------------------------------- pos_sel <- select(SomaScan.db, "11138-16", columns = c("SYMBOL", "GENENAME", "ENTREZID", "ENSEMBL")) pos_sel ## ----keys-ens75, eval = FALSE------------------------------------------------- # # Install package from Bioconductor, if not already installed # if (!require("EnsDb.Hsapiens.v75", quietly = TRUE)) { # BiocManager::install("EnsDb.Hsapiens.v75") # } # # # The central keys of the organism-level database are the Ensembl gene ID # keys(EnsDb.Hsapiens.v75)[1:10L] # # # Also contains the Ensembl gene ID, so this column can be used for merging # grep("ENSEMBL", columns(SomaScan.db), value = TRUE) # # # These columns will inform us as to what positional information we can # # retrieve from the organism-level database # columns(EnsDb.Hsapiens.v75) # # # Build a query to retrieve the prot IDs and start/stop pos of protein domains # pos_res <- select(EnsDb.Hsapiens.v75, keys = "ENSG00000020633", # columns = c("GENEBIOTYPE", "SEQCOORDSYSTEM", "GENEID", # "PROTEINID", "PROTDOMSTART", "PROTDOMEND")) # # # Merge back into `pos_sel` using the "GENEID" column # merge(pos_sel, pos_res, by.x = "ENSEMBL", by.y = "GENEID") ## ----kin-act-go--------------------------------------------------------------- select(GO.db, keys = "cell adhesion", keytype = "TERM", columns = c("GOID", "TERM")) ## ----------------------------------------------------------------------------- cellAd_ids <- select(SomaScan.db, keys = "GO:0007155", keytype = "GO", columns = "PROBEID", "UNIPROTID") head(cellAd_ids, n = 10L) # Total number of SeqIds associated with cell adhesion unique(cellAd_ids$PROBEID) |> length() ## ----------------------------------------------------------------------------- cellAd_prots <- select(org.Hs.eg.db, keys = "GO:0007155", keytype = "GO", columns = "UNIPROT") # Again, we take the unique set of proteins length(unique(cellAd_prots$UNIPROT)) ## ----------------------------------------------------------------------------- cellAd_covProts <- select(SomaScan.db, keys = unique(cellAd_prots$UNIPROT), keytype = "UNIPROT", columns = "PROBEID") head(cellAd_covProts, n = 20L) ## ----------------------------------------------------------------------------- cellAd_covProts <- cellAd_covProts[!is.na(cellAd_covProts$PROBEID),] cellAd_covIDs <- unique(cellAd_covProts$UNIPROT) length(cellAd_covIDs) ## ----kin-act-menu-diff-------------------------------------------------------- cellAd_menu <- lapply(c("5k", "7k"), function(x) { df <- select(SomaScan.db, keys = unique(cellAd_prots$UNIPROT), keytype = "UNIPROT", columns = "PROBEID", menu = x) # Again, removing probes that do not map to a cell adhesion protein df <- df[!is.na(df$PROBEID),] }) |> setNames(c("somascan_5k", "somascan_7k")) identical(cellAd_menu$somascan_5k, cellAd_menu$somascan_7k) ## ----keys-il17---------------------------------------------------------------- il17_family <- keys(SomaScan.db, keytype = "SYMBOL", pattern = "IL17") ## ----select-il17-------------------------------------------------------------- select(SomaScan.db, keys = il17_family, keytype = "SYMBOL", columns = c("PROBEID", "UNIPROT", "GENENAME")) ## ----combine-keys-select------------------------------------------------------ select(SomaScan.db, keys = "NOTCH|ZF", keytype = "SYMBOL", columns = c("PROBEID", "SYMBOL", "GENENAME"), match = TRUE) ## ----homeobox----------------------------------------------------------------- select(SomaScan.db, keys = "homeobox", keytype = "GENENAME", columns = c("PROBEID", "SYMBOL"), match = TRUE) ## ----session-info------------------------------------------------------------- sessionInfo()