## ----results='hide', echo=FALSE, message=FALSE, warning=FALSE------------ set.seed(1) options(width = 120) library(knitr) opts_chunk$set(comment = " ", fig.path = "", fig.align = "center", out.width = "60%", indent = 10, cache = FALSE, cache.path = "../cache", dpi = 300) ## out.width = "1\\columnwidth" ## uncomment options for HTML output, one of them breaks the image export ## background = "#FFFFFF", dev = 'pdf' ## ----setup, results='hide', echo=FALSE, message=FALSE, warning=FALSE----- library(knitr) # to base64 encode images opts_knit$set(upload.fun = image_uri) knit_hooks$set(fig.cap = function(before, options, envir) { if(!before) { paste('

',options$fig.cap,"

",sep="") } }) ## ----load_ss, results='hide',message=FALSE------------------------------- library(SomaticSignatures) ## ----load_supporting_packages, results='hide',message=FALSE-------------- library(GenomicRanges) library(VariantAnnotation) library(ggplot2) library(stringr) ## ----load_data_package, results='hide',message=FALSE--------------------- library(SomaticCancerAlterations) library(BSgenome.Hsapiens.UCSC.hg19) ## ----sca_metadata-------------------------------------------------------- sca_metadata = scaMetadata() print(sca_metadata) ## ----sca_load_pool------------------------------------------------------- sca_all = scaLoadDatasets() sca_merge = unlist(sca_all) short_names = str_split_fixed(rownames(sca_metadata), "_", 2)[ ,1] names(sca_merge) = sca_merge$study = factor(rep(short_names, times = elementLengths(sca_all))) sca_merge = sca_merge[ sca_merge$Variant_Type %in% "SNP" ] sca_merge = keepSeqlevels(sca_merge, hsAutosomes()) ## ----sca_study_table----------------------------------------------------- sort(table(sca_merge$study), decreasing = TRUE) ## ----sca_variant_classification_table------------------------------------ sort(table(sca_merge$Variant_Classification), decreasing = TRUE) ## ----sca_to_vranges------------------------------------------------------ sca_vr = VRanges( seqnames(sca_merge), ranges(sca_merge), ref = sca_merge$Reference_Allele, alt = sca_merge$Tumor_Seq_Allele2, seqinfo = seqinfo(sca_merge)) sca_vr = ucsc(sca_vr) head(sca_vr, 3) ## ----sca_vr_to_motifs---------------------------------------------------- sca_motifs = mutationContext(sca_vr, BSgenome.Hsapiens.UCSC.hg19, unify = TRUE) ## ----sca_add_vars-------------------------------------------------------- sca_motifs$study = sca_merge$study head(sca_motifs, 3) ## ----sca_motif_occurrence------------------------------------------------ sca_occurrence = mutationContextMatrix(sca_motifs, group = "study", normalize = TRUE) head(round(sca_occurrence, 4)) ## ----sca_plot_samples_observed, fig.cap='Observed frequency of somatic motifs, also termed somatic spectrum, across studies.'---- plotSamplesObserved(sca_motifs) ## ----sca_nmf_pca--------------------------------------------------------- n_sigs = 5 sigs_nmf = nmfSignatures(sca_occurrence, r = n_sigs) sigs_pca = pcaSignatures(sca_occurrence, r = n_sigs) ## ----sca_explore_nmf----------------------------------------------------- names(sigs_nmf) sapply(sigs_nmf, dim) head(sigs_nmf$w, 3) head(sigs_nmf$h, 3) ## ----sca_plot_nmf_signatures_map, fig.cap='Composition of somatic signatures estimated with the NMF, represented as a heatmap.'---- plotSignatureMap(sigs_nmf) + ggtitle("Somatic Signatures: NMF - Heatmap") ## ----sca_plot_nmf_signatures, fig.cap='Composition of somatic signatures estimated with the NMF, represented as a barchart.'---- plotSignatures(sigs_nmf) + ggtitle("Somatic Signatures: NMF - Barchart") ## ----sca_plot_nmf_samples_map, fig.cap='Occurrence of signatures estimated with the NMF, represented as a heatmap.'---- plotSampleMap(sigs_nmf) ## ----sca_plot_nmf_samples, fig.cap='Occurrence of signatures estimated with the NMF, represented as a barchart.'---- plotSamples(sigs_nmf) ## ----sca_plot_pca_signatures_map, fig.cap='Composition of somatic signatures estimated with the PCA, represented as a heatmap'---- plotSignatureMap(sigs_pca) + ggtitle("Somatic Signatures: PCA") ## ----sca_plot_pca_signatures, fig.cap='Composition of somatic signatures estimated with the PCA, represented as a barchart.'---- plotSignatures(sigs_pca) ## ----sca_plot_pca_samples_map, fig.cap='Occurrence of signatures estimated with the PCA, represented as a heatmap.'---- plotSampleMap(sigs_pca) ## ----sca_plot_pca_samples, fig.cap='Occurrence of signatures estimated with the PCA, represented as a barchart.'---- plotSamples(sigs_pca) ## ----sva_batch_not_run, eval=FALSE--------------------------------------- ## library(sva) ## library(stringr) ## ## df = as(sca_metadata, "data.frame") ## sample x covariable ## pheno = data.frame(s = unlist(df[ ,"Sequence_Source"]), c = unlist(df[ ,"Cancer_Type"])) ## rownames(pheno) = str_split_fixed(rownames(pheno), "_", 2)[ ,1] ## mod = model.matrix(~ s + c, data = pheno) ## mod0 = model.matrix(~ c, data = pheno) ## ## sv = sva(sca_occurrence, mod, mod0, method = "irw") ## ----kmer_chr1----------------------------------------------------------- k = 3 n = 1e5 chrs = "chr1" chr1_ranges = as(seqinfo(BSgenome.Hsapiens.UCSC.hg19), "GRanges") chr1_ranges = keepSeqlevels(chr1_ranges, chrs) k3_chr1 = kmerFrequency(BSgenome.Hsapiens.UCSC.hg19, n, k, chr1_ranges) k3_chr1 ## ----normalize_motifs---------------------------------------------------- head(sca_occurrence) data(kmers) norms = k3wg / k3we head(norms) sca_norm = normalizeMotifs(sca_occurrence, norms) head(sca_norm) ## ----eval=FALSE---------------------------------------------------------- ## source("http://bioconductor.org/biocLite.R") ## biocLite("SomaticSignatures") ## ----results='hide', message=FALSE--------------------------------------- library(VariantAnnotation) ## ------------------------------------------------------------------------ vr = VRanges( seqnames = "chr1", ranges = IRanges(start = 1000, width = 1), ref = "A", alt = "C") vr ## ----echo=FALSE, results='markup'---------------------------------------- sessionInfo()