## ----style, echo = FALSE, results = 'asis'-------------------------------------------------------- options(width=100) knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))) ## ----setup, echo=FALSE---------------------------------------------------------------------------- suppressPackageStartupMessages({ library(SummarizedExperiment) library(GenomicAlignments) library(AnnotationHub) library(biomaRt) library(airway) library(org.Hs.eg.db) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(RNAseqData.HNRNPC.bam.chr14) }) ## ----ranges, message=FALSE------------------------------------------------------------------------ library(GenomicRanges) gr <- GRanges("A", IRanges(c(10, 20, 22), width=5), "+") shift(gr, 1) # intra-range range(gr) # inter-range reduce(gr) # inter-range snps <- GRanges("A", IRanges(c(11, 17, 24), width=1)) findOverlaps(snps, gr) # between-range setdiff(range(gr), gr) # 'introns' ## ------------------------------------------------------------------------------------------------- library(org.Hs.eg.db) org <- org.Hs.eg.db select(org, "BRCA1", c("ENSEMBL", "GENENAME"), "SYMBOL") ## ------------------------------------------------------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene promoters(txdb) ## ------------------------------------------------------------------------------------------------- library(AnnotationHub) hub = AnnotationHub() hub query(hub, c("ensembl", "81.gtf")) hub[["AH48004"]] ## ----SummarizedExperiment------------------------------------------------------------------------- library(SummarizedExperiment) library(airway) data(airway) airway colData(airway) airway[, airway$dex %in% "trt"] chr14 <- as(seqinfo(airway), "GRanges")["14"] airway[airway %over% chr14,] ## ------------------------------------------------------------------------------------------------- library(RNAseqData.HNRNPC.bam.chr14) fls = RNAseqData.HNRNPC.bam.chr14_BAMFILES basename(fls) ## ------------------------------------------------------------------------------------------------- library(GenomicAlignments) bfls = BamFileList(fls) bfl = bfls[[1]] ## ------------------------------------------------------------------------------------------------- ga = readGAlignments(bfl) ga table(strand(ga)) ## ------------------------------------------------------------------------------------------------- tail(sort(table(cigar(ga)))) ga[cigar(ga) != "72M"] ## ------------------------------------------------------------------------------------------------- summarizeJunctions(ga) junctions <- summarizeJunctions(ga, with.revmap=TRUE) ga[ junctions$revmap[[1]] ] ## ------------------------------------------------------------------------------------------------- coverage(bfl)$chr14 ## ------------------------------------------------------------------------------------------------- library(org.Hs.eg.db) ## ------------------------------------------------------------------------------------------------- select(org.Hs.eg.db, "HNRNPC", c("ENTREZID", "GENENAME"), "SYMBOL") sym2eg <- mapIds(org.Hs.eg.db, "HNRNPC", "ENTREZID", "SYMBOL") ## ------------------------------------------------------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene ## ------------------------------------------------------------------------------------------------- gns <- genes(txdb) exonsBy(txdb, "gene")[sym2eg] ## ------------------------------------------------------------------------------------------------- library(Rsamtools) param <- ScanBamParam(which=gns[sym2eg]) readGAlignments(bfl, param=param) ## ------------------------------------------------------------------------------------------------- library(airway) data(airway) airway ## ------------------------------------------------------------------------------------------------- x <- assay(airway) class(x) dim(x) head(x) colData(airway) rowRanges(airway) ## ------------------------------------------------------------------------------------------------- symid <- mapIds(org.Hs.eg.db, rownames(airway), "SYMBOL", "ENSEMBL") ## ------------------------------------------------------------------------------------------------- mcols(rowRanges(airway))$symid <- symid ## ------------------------------------------------------------------------------------------------- cidx <- colData(airway)$dex %in% "trt" airway[, cidx] ## shortcut airway[, airway$dex %in% "trt"] ## ------------------------------------------------------------------------------------------------- chr14 <- as(seqinfo(rowRanges(airway)), "GRanges")["14"] ridx <- rowRanges(airway) %over% chr14 airway[ridx,] ## shortcut chr14 <- as(seqinfo(airway), "GRanges")["14"] airway[airway %over% chr14,] ## ------------------------------------------------------------------------------------------------- library(AnnotationHub) hub <- AnnotationHub() query(hub, c("epigenome", "metadata")) meta <- hub[["AH41830"]] ## ------------------------------------------------------------------------------------------------- table(meta$ANATOMY) meta[meta$ANATOMY == "LIVER",] ## ------------------------------------------------------------------------------------------------- query(hub, c("E118", "mnemonic")) E118 <- hub[["AH46971"]] E118 ## ------------------------------------------------------------------------------------------------- table(E118$name) E118[E118$name %in% "Heterochromatin"] ## ----biomart, eval=FALSE-------------------------------------------------------------------------- # library(biomaRt) # head(listMarts(), 3) ## list marts # head(listDatasets(useMart("ensembl")), 3) ## mart datasets # ensembl <- ## fully specified mart # useMart("ensembl", dataset = "hsapiens_gene_ensembl") # # head(listFilters(ensembl), 3) ## filters # myFilter <- "chromosome_name" # substr(filterOptions(myFilter, ensembl), 1, 50) ## return values # myValues <- c("21", "22") # head(listAttributes(ensembl), 3) ## attributes # myAttributes <- c("ensembl_gene_id","chromosome_name") # # ## assemble and query the mart # res <- getBM(attributes = myAttributes, filters = myFilter, # values = myValues, mart = ensembl)