## ----style, echo = FALSE, results = 'asis'-------------------------------------------------------- options(width=100) knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))) ## ----setup, echo=FALSE---------------------------------------------------------------------------- suppressPackageStartupMessages({ library(AnnotationHub) library(airway) library(org.Hs.eg.db) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(RNAseqData.HNRNPC.bam.chr14) library(GenomicAlignments) library(shiny) library(BiocParallel) library(microbenchmark) }) ## ------------------------------------------------------------------------------------------------- library(org.Hs.eg.db) org <- org.Hs.eg.db select(org, "BRCA1", c("ENSEMBL", "GENENAME"), "SYMBOL") ## ------------------------------------------------------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene promoters(txdb) ## ------------------------------------------------------------------------------------------------- library(AnnotationHub) hub = AnnotationHub() hub query(hub, c("ensembl", "81.gtf")) hub[["AH48004"]] ## ------------------------------------------------------------------------------------------------- library(org.Hs.eg.db) ## ------------------------------------------------------------------------------------------------- select(org.Hs.eg.db, "HNRNPC", c("ENTREZID", "GENENAME"), "SYMBOL") sym2eg <- mapIds(org.Hs.eg.db, "HNRNPC", "ENTREZID", "SYMBOL") ## ------------------------------------------------------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene ## ------------------------------------------------------------------------------------------------- gns <- genes(txdb) exonsBy(txdb, "gene")[sym2eg] ## ------------------------------------------------------------------------------------------------- library(RNAseqData.HNRNPC.bam.chr14) fls = RNAseqData.HNRNPC.bam.chr14_BAMFILES basename(fls) ## ------------------------------------------------------------------------------------------------- library(GenomicAlignments) bfls = BamFileList(fls) bfl = bfls[[1]] ## ------------------------------------------------------------------------------------------------- library(Rsamtools) param <- ScanBamParam(which=gns[sym2eg]) readGAlignments(bfl, param=param) ## ------------------------------------------------------------------------------------------------- library(airway) data(airway) airway ## ------------------------------------------------------------------------------------------------- symid <- mapIds(org.Hs.eg.db, rownames(airway), "SYMBOL", "ENSEMBL") ## ------------------------------------------------------------------------------------------------- mcols(rowRanges(airway))$symid <- symid ## ------------------------------------------------------------------------------------------------- library(AnnotationHub) hub <- AnnotationHub() query(hub, c("epigenome", "metadata")) meta <- hub[["AH41830"]] ## ------------------------------------------------------------------------------------------------- table(meta$ANATOMY) meta[meta$ANATOMY == "LIVER",] ## ------------------------------------------------------------------------------------------------- query(hub, c("E118", "mnemonic")) E118 <- hub[["AH46971"]] E118 ## ------------------------------------------------------------------------------------------------- table(E118$name) E118[E118$name %in% "Heterochromatin"] ## ----biomart, eval=FALSE-------------------------------------------------------------------------- # library(biomaRt) # head(listMarts(), 3) ## list marts # head(listDatasets(useMart("ensembl")), 3) ## mart datasets # ensembl <- ## fully specified mart # useMart("ensembl", dataset = "hsapiens_gene_ensembl") # # head(listFilters(ensembl), 3) ## filters # myFilter <- "chromosome_name" # substr(filterOptions(myFilter, ensembl), 1, 50) ## return values # myValues <- c("21", "22") # head(listAttributes(ensembl), 3) ## attributes # myAttributes <- c("ensembl_gene_id","chromosome_name") # # ## assemble and query the mart # res <- getBM(attributes = myAttributes, filters = myFilter, # values = myValues, mart = ensembl) ## ----vectorize------------------------------------------------------------------------------------ x <- 1:10 log(x) ## NOT for (i in seq_along(x)) x[i] <- log(x[i]) ## ----pre-allocate--------------------------------------------------------------------------------- result <- numeric(10) result[1] <- runif(1) for (i in 2:length(result)) result[i] <- runif(1) * result[i - 1] result ## ----inefficient---------------------------------------------------------------------------------- f0 <- function(n, a=2) { ## stopifnot(is.integer(n) && (length(n) == 1) && ## !is.na(n) && (n > 0)) result <- numeric() for (i in seq_len(n)) result[[i]] <- a * log(i) result } ## ----system-time---------------------------------------------------------------------------------- system.time(f0(10000)) n <- 1000 * seq(1, 20, 2) t <- sapply(n, function(i) system.time(f0(i))[[3]]) plot(t ~ n, type="b") ## ----correct-init--------------------------------------------------------------------------------- n <- 10000 system.time(expected <- f0(n)) head(expected) ## ----hoist---------------------------------------------------------------------------------------- f1 <- function(n, a=2) { result <- numeric() for (i in seq_len(n)) result[[i]] <- log(i) a * result } identical(expected, f1(n)) library(microbenchmark) microbenchmark(f0(n), f1(n), times=5) ## ----preallocate-and-fill------------------------------------------------------------------------- f2 <- function(n, a=2) { result <- numeric(n) for (i in seq_len(n)) result[[i]] <- log(i) a * result } identical(expected, f2(n)) microbenchmark(f0(n), f2(n), times=5) ## ----use-apply------------------------------------------------------------------------------------ f3 <- function(n, a=2) a * sapply(seq_len(n), log) identical(expected, f3(n)) microbenchmark(f0(n), f2(n), f3(n), times=10) ## ----use-vectorize-------------------------------------------------------------------------------- f4 <- function(n, a=2) a * log(seq_len(n)) identical(expected, f4(n)) microbenchmark(f0(n), f3(n), f4(n), times=10) ## ----vectorized-scale, eval=FALSE----------------------------------------------------------------- # n <- 10 ^ (5:8) # 100x larger than f0 # t <- sapply(n, function(i) system.time(f4(i))[[3]]) # plot(t ~ n, log="xy", type="b") ## ----parallel-sleep------------------------------------------------------------------------------- library(BiocParallel) fun <- function(i) { Sys.sleep(1) i } ## serial f0 <- function(n) lapply(seq_len(n), fun) ## parallel f1 <- function(n) bplapply(seq_len(n), fun)