## ----experimentPrep, eval=FALSE--------------------------------------------
#  
#  library(BiocParallel)
#  dir.create("fastq", showWarnings=FALSE)
#  
#  extractSRA <- function( sra_accession, exe_path = 'fastq-dump',
#                          args = '--split-3 --gzip', outdir = 'fastq',
#                          dry_run = FALSE)
#  {
#      cmdline = sprintf('%s %s --outdir %s %s',
#                        exe_path, args, outdir, sra_accession)
#      if(dry_run) {
#        message("will run with this command line:\n",cmdline)
#      } else {
#        return( system( cmdline ) )
#      }
#  }
#  
#  samples <- c( "SRR5273705", "SRR5273689", "SRR5273699", "SRR5273683" )
#  
#  bplapply( samples, extractSRA, BPPARAM=MulticoreParam(4) )
#  
#  annotation <-
#      data.frame(
#          samples,
#          tissue=c("brain", "brain", "heart", "heart" ) )
#  

## ----downloadReference, eval=FALSE-----------------------------------------
#  dir.create("reference/raw", recursive=TRUE, showWarnings=FALSE)
#  download.file("ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_mouse/release_M16/gencode.vM16.transcripts.fa.gz",
#                destfile="reference/raw/transcriptome.fa.gz")

## ----indexBuild, eval=FALSE------------------------------------------------
#  
#  dir.create("reference/index", showWarnings=FALSE)
#  
#  system("kallisto index -i reference/index/kallistoIdx.idx reference/raw/transcriptome.fa.gz")
#  system("salmon index -t reference/raw/transcriptome.fa.gz -i reference/index/salmon_index")
#  system("gunzip -c reference/raw/transcriptome.fa.gz > reference/raw/transcriptome.fa && sailfish index -t reference/raw/transcriptome.fa -o reference/index/sailfish_index")
#  
#  library(Biostrings)
#  dnSt <- names( readDNAStringSet("reference/raw/transcriptome.fa.gz") )
#  dnSt <- sapply( strsplit( dnSt, "\\||\\." ), "[[", 1 )
#  

## ----versions, eval=FALSE--------------------------------------------------
#  
#  library(SummarizedBenchmark)
#  
#  getKallistoVersion <- function(){
#      vers <-
#        suppressWarnings( system( "kallisto", intern=TRUE )[1] )
#      strsplit( vers, " " )[[1]][2]
#  }
#  
#  getSalmonVersion <- function(){
#      vers <-
#        suppressWarnings( system( "salmon --version 2>&1", intern=TRUE)[1] )
#      strsplit( vers, " " )[[1]][2]
#  }
#  
#  getSailfishVersion <- function(){
#      vers <-
#        suppressWarnings( system( "sailfish --version 2>&1", intern=TRUE)[1] )
#      strsplit( vers, " " )[[1]][3]
#  }
#  

## ---- eval=FALSE-----------------------------------------------------------
#  
#  dir.create("out/kallisto", showWarnings=FALSE)
#  dir.create("out/salmon", showWarnings=FALSE)
#  dir.create("out/sailfish", showWarnings=FALSE)
#  
#  runKallisto <- function( sample, args="" ){
#      fastqFile1 <- sprintf( "fastq/%s_1.fastq.gz", sample )
#      fastqFile2 <- gsub( "_1", "_2", fastqFile1 )
#      output <- sprintf("out/kallisto/%s.out", sample)
#      cmd <- sprintf( "kallisto quant -i reference/index/kallistoIdx.idx -o %s %s %s %s",
#                      output, args, fastqFile1, fastqFile2 )
#      system( cmd )
#      require(tximport)
#      ab <-
#        tximport( file.path(output, "abundance.h5"),
#                  type="kallisto", txOut=TRUE )
#      counts <- ab$counts[,1]
#      names(counts) <-
#        sapply( strsplit( names( counts ), "\\||\\." ), "[[", 1 )
#      counts
#  }
#  
#  runSalmon <- function( sample, args="-l A -p 4" ){
#      fastqFile1 <- sprintf( "fastq/%s_1.fastq.gz", sample )
#      fastqFile2 <- gsub( "_1", "_2", fastqFile1 )
#      output <- sprintf("out/salmon/%s.out", sample)
#      cmd <- sprintf("salmon quant -i reference/index/salmon_index %s -o %s -1 %s -2 %s",
#                     args, output, fastqFile1, fastqFile2)
#      system( cmd )
#      require(tximport)
#      counts <-
#        tximport( file.path( output, "quant.sf" ),
#                  type="salmon", txOut=TRUE )$counts[,1]
#      names( counts ) <-
#        sapply( strsplit( names( counts ), "\\||\\." ), "[[", 1 )
#      counts
#  }
#  
#  runSailfish <- function( sample, args="-l IU" ){
#      fastqFile1 <- sprintf( "fastq/%s_1.fastq.gz", sample )
#      fastqFile2 <- gsub( "_1", "_2", fastqFile1 )
#      output <- sprintf("out/sailfish/%s.out", sample)
#      cmd <- sprintf( "echo \"sailfish quant -i reference/index/sailfish_index %s -o %s -1 <(zcat %s) -2 <(zcat %s)\" | bash",
#                      args, output, fastqFile1, fastqFile2 )
#      cat(cmd)
#      system( cmd )
#      counts <-
#        tximport( file.path(output, "quant.sf"),
#                  type="sailfish", txOut=TRUE )$counts[,1]
#      names( counts ) <-
#        sapply( strsplit( names( counts ), "\\||\\." ), "[[", 1 )
#      counts
#  }
#  

## ---- eval=FALSE-----------------------------------------------------------
#  
#  library(SummarizedBenchmark)
#  library(tximport)
#  
#  b <- BenchDesign() %>%
#      addMethod(label = "kallisto-default",
#                func = runKallisto,
#                params = rlang::quos(sample = sample,
#                                     args = "-t 16"),
#                meta = list(version = rlang::quo(getKallistoVersion()))
#                ) %>%
#      addMethod(label = "kallisto-bias",
#               func = runKallisto,
#               params = rlang::quos(sample = sample,
#                                    args = "--bias -t 16"),
#               meta = list(version = rlang::quo(getKallistoVersion()))
#               ) %>%
#     addMethod(label = "salmon-default",
#               func = runSalmon,
#               params = rlang::quos(sample = sample,
#                                    args = "-l IU -p 16"),
#               meta = list(version = rlang::quo(getSalmonVersion()))
#               ) %>%
#     addMethod(label = "salmon-gcBias",
#               func = runSalmon,
#               params = rlang::quos(sample=sample,
#                                    args="-l IU --gcBias -p 16"),
#               meta = list(version = rlang::quo(getSalmonVersion()))
#               ) %>%
#     addMethod(label = "sailfish-default",
#               func = runSailfish,
#               params = rlang::quos(sample=sample,
#                                    args="-l IU -p 16"),
#               meta = list(version = rlang::quo(getSailfishVersion()))
#               )
#  
#  printMethods(b)

## ----runBenchmark, eval=FALSE----------------------------------------------
#  
#  dat <- list(txIDs=dnSt)
#  allSB <- lapply(samples, function(sample) {
#      dat[["sample"]] <- sample
#      sb <- buildBench( b, data=dat, sortIDs="txIDs",
#                       catchErrors=FALSE, parallel=TRUE,
#                       BPPARAM=SerialParam() )
#      colData( sb )$sample <- sample
#      colData( sb )$tissue <-
#                      as.character( annotation$tissue[annotation$sample == sample] )
#      sb
#  } )
#  
#  

## ----subsample, eval=FALSE-------------------------------------------------
#  
#  keepRows <- rowSums(
#      sapply( allSB,
#             function(x){
#                 rowSums( is.na( assay( x ) ) )
#             }) ) == 0
#  allSB <- lapply( allSB,
#                  function(x){
#                      x[keepRows,]
#                  } )
#  
#  set.seed(12)
#  keepRows <- sample(
#      seq_len(nrow(allSB[[1]])), 50000 )
#  
#  allSB <- lapply( allSB,
#                  function(x){
#                      x[keepRows,]
#                  } )
#  
#  #save(allSB, file="../data/quantSB.rda",
#  #     compress="xz", compression_level=9 )
#  

## ----loadSB, message=FALSE-------------------------------------------------
library(SummarizedBenchmark)
data("quantSB")

## ----metadata--------------------------------------------------------------
colData(allSB[[1]])[,c("param.args", "meta.version", "sample", "tissue")]

## ----pcaPlot---------------------------------------------------------------

keep <- !rowSums( is.na( assays( allSB[[1]] )[["default"]] ) ) > 0

pcaRes <- 
  prcomp(  log10( t( assays( allSB[[1]] )[["default"]][keep,] ) + 1 ) )
varExp <- round( 100*(pcaRes$sdev/sum( pcaRes$sdev )), 2)

tidyData <- data.frame( 
  PC1=pcaRes$x[,"PC1"], 
  PC2=pcaRes$x[,"PC2"], 
  sample=colData( allSB[[1]] )$sample, 
  label=gsub("\\d+$", "", rownames( colData(allSB[[1]]) ) ) )

tidyData <- tidyData %>%
  dplyr::mutate( 
             method=gsub( "-.*$", "", label) )


tidyData %>%
  ggplot( aes( PC1, PC2, colour=label ) ) +
  geom_point() + coord_fixed() +
  ylab(sprintf( "PC2 (%0.2f %%)", varExp[2]) ) +
  xlab(sprintf( "PC1 (%0.2f %%)", varExp[1]) ) +
  theme(legend.pos="top") +
  guides(col = guide_legend(nrow = 5), 
         shape = guide_legend(nrow = 4))


## ----rnaseqcomp, message=FALSE---------------------------------------------
library( rnaseqcomp )
library( biomaRt )
data( simdata )
houseHuman <- simdata$meta$gene[simdata$meta$house]
houseHuman <- gsub("\\.\\d+", "", houseHuman )

mart <- useMart( "ensembl", "mmusculus_gene_ensembl" )
geneMap <- getBM( c( "ensembl_transcript_id", 
                     "hsapiens_homolog_ensembl_gene", 
                     "hsapiens_homolog_orthology_type" ), 
                  mart=mart )
geneMap <- 
  geneMap[geneMap$`hsapiens_homolog_orthology_type` == "ortholog_one2one",]
houseMouse <- 
  geneMap$ensembl_transcript_id[geneMap$hsapiens_homolog_ensembl_gene %in% 
                                  houseHuman]

allSB <- do.call( cbind, allSB )
colData( allSB )$label <- gsub("\\d+$", "", rownames( colData( allSB ) ) )

condInfo <- 
    colData( allSB )[colData( allSB )$label == "kallisto-default","tissue"]


replicateInfo <- condInfo
evaluationFeature <- rep( TRUE, length.out = nrow( allSB ) )

calibrationFeature <- rownames(allSB) %in% houseMouse

unitReference <- 1
quantificationList <- lapply( 
  split( seq_along( colnames( allSB ) ), colData( allSB )$label ), 
  function(x){
    assay( allSB )[,x]
  } )

compObject <- signalCalibrate( 
  quantificationList, 
  factor(condInfo), factor(replicateInfo), 
  evaluationFeature, calibrationFeature, unitReference, 
  calibrationFeature2 = calibrationFeature)

compObject