## ----style, echo = FALSE, results = 'asis'-------------------------------------------------------- BiocStyle::markdown() options(width=100, max.print=1000) knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))) ## ----setup, echo=FALSE, messages=FALSE, warnings=FALSE-------------------------------------------- suppressPackageStartupMessages({ library(Biostrings) library(GenomicRanges) library(GenomicFiles) library(BiocParallel) library(TxDb.Hsapiens.UCSC.hg19.knownGene) }) ## ----iranges-------------------------------------------------------------------------------------- library(IRanges) ir <- IRanges(start=c(10, 20, 30), width=5) ir ## ----iranges-flank-------------------------------------------------------------------------------- flank(ir, 3) ## ----iranges-class-------------------------------------------------------------------------------- class(ir) getClass(class(ir)) ## ----iranges-flank-method, eval=FALSE------------------------------------------------------------- # ?"flank,Ranges-method" ## ----granges-------------------------------------------------------------------------------------- library(GenomicRanges) gr <- GRanges(c("chr1", "chr1", "chr2"), ir, strand=c("+", "-", "+")) gr ## ----granges-flank-------------------------------------------------------------------------------- flank(gr, 3) ## ----granges-class-------------------------------------------------------------------------------- class(gr) getClass(class(gr)) ## ----granges-flank-method, eval=FALSE------------------------------------------------------------- # ?"flank,GenomicRanges-method" ## ----granges-methods------------------------------------------------------------------------------ methods(class="GRanges") ## ----granges-man-and-vignettes, eval=FALSE-------------------------------------------------------- # help(package="GenomicRanges") # vignette(package="GenomicRanges") # vignette(package="GenomicRanges", "GenomicRangesHOWTOs") ## ----txdb----------------------------------------------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene ## ----txdb-exons----------------------------------------------------------------------------------- exons(txdb) ## ----txdb-exonsby--------------------------------------------------------------------------------- exonsBy(txdb, "tx") ## ----BSgenome-require, message=FALSE-------------------------------------------------------------- library(BSgenome.Hsapiens.UCSC.hg19) chr14_range = GRanges("chr14", IRanges(1, seqlengths(Hsapiens)["chr14"])) chr14_dna <- getSeq(Hsapiens, chr14_range) letterFrequency(chr14_dna, "GC", as.prob=TRUE) ## ----bam-require---------------------------------------------------------------------------------- library(GenomicRanges) library(GenomicAlignments) library(Rsamtools) ## our 'region of interest' roi <- GRanges("chr14", IRanges(19653773, width=1)) ## sample data library('RNAseqData.HNRNPC.bam.chr14') bf <- BamFile(RNAseqData.HNRNPC.bam.chr14_BAMFILES[[1]], asMates=TRUE) ## alignments, junctions, overlapping our roi paln <- readGAlignmentsList(bf) j <- summarizeJunctions(paln, with.revmap=TRUE) j_overlap <- j[j %over% roi] ## supporting reads paln[j_overlap$revmap[[1]]] ## ----vcf, message=FALSE--------------------------------------------------------------------------- ## input variants library(VariantAnnotation) fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation") vcf <- readVcf(fl, "hg19") seqlevels(vcf) <- "chr22" ## known gene model library(TxDb.Hsapiens.UCSC.hg19.knownGene) coding <- locateVariants(rowRanges(vcf), TxDb.Hsapiens.UCSC.hg19.knownGene, CodingVariants()) head(coding) ## ----genomicalignments---------------------------------------------------------------------------- ## example BAM data library(RNAseqData.HNRNPC.bam.chr14) ## one BAM file fl <- RNAseqData.HNRNPC.bam.chr14_BAMFILES[1] ## Let R know that this is a BAM file, not just a character vector library(Rsamtools) bfl <- BamFile(fl) ## ----readgalignments------------------------------------------------------------------------------ aln <- readGAlignments(bfl) aln ## ----galignments-methods-------------------------------------------------------------------------- methods(class=class(aln)) ## ----iteration------------------------------------------------------------------------------------ library(GenomicFiles) yield <- function(bfl) { ## input a chunk of alignments library(GenomicAlignments) readGAlignments(bfl, param=ScanBamParam(what="seq")) } map <- function(aln) { ## Count G or C nucleotides per read library(Biostrings) gc <- letterFrequency(mcols(aln)$seq, "GC") ## Summarize number of reads with 0, 1, ... G or C nucleotides tabulate(1 + gc, 73) # max. read length: 72 } reduce <- `+` ## ----iteration-doit------------------------------------------------------------------------------- library(RNAseqData.HNRNPC.bam.chr14) fls <- RNAseqData.HNRNPC.bam.chr14_BAMFILES bf <- BamFile(fls[1], yieldSize=100000) gc <- reduceByYield(bf, yield, map, reduce) plot(gc, type="h", xlab="GC Content per Aligned Read", ylab="Number of Reads") ## ----parallel-doit-------------------------------------------------------------------------------- library(BiocParallel) gc <- bplapply(BamFileList(fls), reduceByYield, yield, map, reduce) library(ggplot2) df <- stack(as.data.frame(lapply(gc, cumsum))) df$GC <- 0:72 ggplot(df, aes(x=GC, y=values)) + geom_line(aes(colour=ind)) + xlab("Number of GC Nucleotides per Read") + ylab("Number of Reads")