## ------------------------------------------------------------------------ library(h5vc) library(rhdf5) library(GenomicRanges) tallyFile <- system.file( "extdata", "example.tally.hfs5", package = "h5vcData" ) sampleData <- getSampleData( tallyFile, "/ExampleStudy/16" ) ## ----, eval = FALSE, tidy = FALSE---------------------------------------- ## callVariantsPaired <- function (data, sampledata, cl = vcConfParams()) ## ------------------------------------------------------------------------ vcConfParams() ## ----, tidy=FALSE-------------------------------------------------------- calls16 <- h5dapply( filename = tallyFile, group = "/ExampleStudy/16", blocksize = 10000, FUN = callVariantsPaired, sampledata = sampleData, cl = vcConfParams( returnDataPoints = TRUE, annotateWithBackground = TRUE ), names = c( "Coverages", "Counts", "Reference" ), range = c(29950000,30000000) ) calls22 <- h5dapply( filename = tallyFile, group = "/ExampleStudy/22", blocksize = 10000, FUN = callVariantsPaired, sampledata = sampleData, cl = vcConfParams( returnDataPoints = TRUE, annotateWithBackground = TRUE ), names = c( "Coverages", "Counts", "Reference" ), range = c(39350000,39400000) ) calls <- rbind(do.call(rbind,calls16), do.call(rbind,calls22)) ## ------------------------------------------------------------------------ calls$Patient = sampleData$Patient[ match( calls$Sample, sampleData$Sample) ] calls <- subset( calls, pValueFwd < 0.05 & pValueRev < 0.05 ) calls$varID = paste( calls$seqnames, calls$start, calls$altAllele, calls$Patient ) calls <- calls[!duplicated(calls$varID),] calls ## ----, fig.width=18, fig.height=10--------------------------------------- windowsize <- 35 for( i in seq(nrow(calls))){ position <- calls$Start[i] data <- h5dapply( filename = tallyFile, group = paste( "/ExampleStudy", calls$Chrom[i], sep="/" ), blocksize = windowsize * 3, names = c("Coverages","Counts","Deletions"), range = c( position - windowsize, position + windowsize) )[[1]] p <- mismatchPlot( data = data, sampledata = sampleData, samples = sampleData$Sample[sampleData$Patient == calls$Patient[i]], windowsize = windowsize, position = position ) print(p) } ## ----, fig.width=18, fig.height=10--------------------------------------- library(h5vc) library(ggplot2) tallyFile <- system.file( "extdata", "example.tally.hfs5", package = "h5vcData" ) sampleData <- getSampleData( tallyFile, "/ExampleStudy/16" ) position <- 29979629 windowsize <- 30 samples <- sampleData$Sample[sampleData$Patient == "Patient8"] data <- h5dapply( filename = tallyFile, group = "/ExampleStudy/16", blocksize = windowsize * 3, names = c("Coverages", "Counts", "Deletions"), range = c(position - windowsize, position + windowsize) )[[1]] #choose blocksize larger than range so that all needed data is collected as one block # Summing up all mismatches irrespective of the alternative allele data$CountsAggregate = colSums(data$Counts) # Simple overview plot showing number of mismatches per position p <- ggplot() + geom_h5vc( data=data, sampledata=sampleData, windowsize = 35, position = 500, dataset = "Coverages", fill = "gray" ) + geom_h5vc( data=data, sampledata=sampleData, windowsize = 35, position = 500, dataset = "CountsAggregate", fill = "#D50000" ) + facet_wrap( ~ Sample, ncol = 2 ) print(p) ## ----variantCallingForMutationSpectrum, eval=FALSE----------------------- ## # loading library and example data ## library(h5vc) ## tallyFile <- system.file( "extdata", "example.tally.hfs5", package = "h5vcData" ) ## sampleData <- getSampleData( tallyFile, "/ExampleStudy/16" ) ## windowsize <- 100000 ## samples <- sampleData$Sample[sampleData$Patient == "Patient8"] ## # Calling Variants ## vars <- h5dapply( ## filename = tallyFile, ## group = "/ExampleStudy/16", ## blocksize = 10000, ## FUN = callVariantsPaired, ## sampledata = sampleData, ## cl = vcConfParams(returnDataPoints=TRUE), ## names = c("Coverages", "Counts", "Reference"), ## range = c(29000000, 30000000), ## verbose=TRUE ## ) ## ## variantCalls <- do.call(rbind, vars) ## ## vars <- h5dapply( ## filename = tallyFile, ## group = "/ExampleStudy/22", ## blocksize = 10000, ## FUN = callVariantsPaired, ## sampledata = sampleData, ## cl = vcConfParams(returnDataPoints=TRUE), ## names = c("Coverages", "Counts", "Reference"), ## range = c(38000000, 40000000), ## verbose=TRUE ## ) ## ## variantCalls <- rbind( variantCalls, do.call(rbind, vars) ) ## rownames(variantCalls) <- NULL ## ## save(variantCalls, file = "example.variants.RDa") ## ------------------------------------------------------------------------ data("example.variants", package="h5vcData") ms <- mutationSpectrum( variantCalls, tallyFile, "/ExampleStudy" ) head(ms) ## ----mutationSpectrumPlot, fig.width=18, fig.height=10------------------- suppressPackageStartupMessages(library(ggplot2)) p <- ggplot( ms, aes( x = paste( Prefix, Suffix, sep = "." ), fill = Transition) ) + geom_bar(width = 0.5) + facet_grid( Sample ~ Transition) + scale_y_discrete(name="Number of Events") + scale_x_discrete(name="Mutation Context") + theme( axis.text = element_text( size = 14 ), axis.title = element_text( size = 16 ), axis.text.x = element_text( angle = 90, hjust = 0 ), strip.text = element_text(size = 14), legend.position="none" ) print(p)