## ----loadtallyfile------------------------------------------------------- library(h5vc) tallyFile <- system.file( "extdata", "example.tally.hfs5", package = "h5vcData" ) ## ----checktallyfile------------------------------------------------------ library(rhdf5) h5ls(tallyFile) ## ----checksampledata----------------------------------------------------- sampleData <- getSampleData( tallyFile, "/ExampleStudy/16" ) print(sampleData) ## ----directdataextraction------------------------------------------------ data <- h5dapply( filename = tallyFile, group = "/ExampleStudy/16", names = c( "Coverages" ), blocksize = 10000, range = c(29000000,29010000) ) str(data) ## ----coverageexample----------------------------------------------------- data <- h5dapply( filename = tallyFile, group = "/ExampleStudy/16", names = c( "Coverages" ), blocksize = 1000, range = c(29000000,29010000), FUN = function(x) rowSums(x$Coverages), verbose = TRUE ) coverages <- do.call( rbind, data ) colnames(coverages) <- sampleData$Sample[order(sampleData$Column)] coverages ## ----binnedCoverageExample----------------------------------------------- data <- h5dapply( filename = tallyFile, group = "/ExampleStudy/22", names = c( "Coverages" ), blocksize = 500, range = c(38750000,39250000), FUN = binnedCoverage, sampledata = sampleData ) data <- do.call(rbind, data) rownames(data) = NULL head(data) ## ----sizeFactors--------------------------------------------------------- librarySizes <- colSums(data[,1:6]) sizeFactors <- librarySizes / mean(librarySizes) sizeFactors ## ----adjustForSizeFactors------------------------------------------------ for( n in names(sizeFactors) ){ data[[n]] <- data[[n]] / sizeFactors[n] } ## ----plotLog2Ratios, fig.width=18, fig.height=10------------------------- suppressPackageStartupMessages(library(reshape)) suppressPackageStartupMessages(library(ggplot2)) pairWiseRatio <- function( data, sampledata, trans = log2 ){ sampledata <- subset( sampledata, Sample %in% colnames(data)) ret <- data.frame(Start = data$Start) for( patient in sampledata$Patient){ control <- subset(sampledata, Patient == patient & Type == "Control")$Sample if( length(control) != 1 ){ stop(paste("Exactly one control sample per patient is expected. Found", length(control), "in patient", patient) ) } for( case in subset(sampledata, Patient == patient & Type == "Case")$Sample){ ret[[ paste( case, control, sep="/") ]] <- trans( data[[case]] / data[[control]] ) } } return(ret) } plot_data <- melt(pairWiseRatio( data, sampleData ), id.vars=c("Start")) colnames(plot_data) = c("Position", "Samples", "Ratio") ggplot( plot_data, aes( x = Position, y = Ratio, color = Samples ) ) + geom_line() + facet_wrap( ~Samples, ncol = 1)