## ----installation, eval=FALSE------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("SETA") ## ----load packages, message=FALSE, warning=FALSE------------------------------ library(SingleCellExperiment) library(SETA) library(ggplot2) library(ggraph) library(tidygraph) library(dplyr) library(ggplot2) library(patchwork) library(TabulaMurisSenisData) ## ----palettes----------------------------------------------------------------- # Set up a color palette for plots # 9-group distinct categoricals d_palette <- c("#3B9AB2", "#6BAFAF", "#9BBDAC", "#C9C171", "#EBCC2A", "#E6A80F", "#E98903", "#F84C00", "#F21A00", "#B10026", "#67000D") ## ----load data, echo = FALSE-------------------------------------------------- sce <- TabulaMurisSenisDroplet(tissues = "Lung")$Lung sce <- sce[, colData(sce)$subtissue != "immune-endo-depleted"] sce$mouse.id <- gsub("/", "", sce$mouse.id) ## ----tabular data exploration------------------------------------------------- table(sce$free_annotation, sce$age) ## ----add lineage information-------------------------------------------------- # We'll make a small mapping from original free_annotation -> broader 'Lineage' lineage_map <- c( "Adventitial Fibroblast" = "Fibroblast", "Airway Smooth Muscle" = "SMC", "Alveolar Epithelial Type 2" = "Epithelial", "Alveolar Fibroblast" = "Fibroblast", "Alveolar Macrophage" = "Myeloid", "Artery" = "Endothelial", "B" = "B cell", "Basophil" = "Granulocyte", "Ccr7+ Dendritic" = "Myeloid", "CD4+ T" = "T cell", "CD8+ T" = "T cell", "Capillary" = "Endothelial", "Capillary Aerocyte" = "Endothelial", "Ciliated" = "Epithelial", "Classical Monocyte" = "Myeloid", "Club" = "Epithelial", "Intermediate Monocyte" = "Myeloid", "Interstitial Macrophage" = "Myeloid", "Lympatic" = "Endothelial", "Ly6g5b+ T" = "T cell", "Myeloid Dendritic Type 1" = "Myeloid", "Myeloid Dendritic Type 2" = "Myeloid", "Myofibroblast" = "Fibroblast", "Natural Killer" = "NK", "Natural Killer T" = "NK", "Neuroendocrine" = "Neuroendocrine", "Neutrophil" = "Granulocyte", "Nonclassical Monocyte" = "Myeloid", "Pericyte" = "Endothelial", "Plasma" = "Plasma Cell", "Plasmacytoid Dendritic" = "Myeloid", "Proliferating Alveolar Macrophage" = "Proliferating", "Proliferating Classical Monocyte" = "Proliferating", "Proliferating Dendritic" = "Proliferating", "Proliferating NK" = "Proliferating", "Proliferating T" = "Proliferating", "Regulatory T" = "T cell", "Vein" = "Endothelial", "Zbtb32+ B" = "B cell" ) # Add a new column in named 'Lineage' free_annotations <- as.character(sce$free_annotation) sce$Lineage <- lineage_map[free_annotations] ## ----create taxonomy DF------------------------------------------------------- # In a Seurat Object, barcodes are in the rownames of the metadata. # Similar to setaCounts, taxonomy DF creation can use rownames as bc. # Then we ensure we have columns for 'free_annotation' and 'Lineage'. print(head(sce$free_annotation)) print(head(sce$Lineage)) # We want a data frame that includes bc + 'free_annotation' + 'Lineage' # Then we'll pass it to setaTaxonomyDF tax_df <- setaTaxonomyDF( obj = data.frame(colData(sce)), resolution_cols = c("Lineage", "free_annotation"), bc_col = "cell" ) # Each row is a unique 'free_annotation'. # The last column 'free_annotation' is the finer label. # cols must be entered in order of increasing resolution (broadest to finest) tax_df ## ----ggraph, message=FALSE, warning=FALSE, fig.width = 8, fig.height = 6------ # Create a tidygraph object using SETA built-in utils tbl_g <- taxonomy_to_tbl_graph( tax_df, columns = c("Lineage", "free_annotation")) # Plot with ggraph ggraph(tbl_g, layout = "dendrogram") + geom_edge_diagonal2(aes(color = node.Lineage)) + geom_node_text(aes(filter = leaf, label = free_annotation, color = Lineage), nudge_y = 0.1, vjust = 0.5, hjust = 0, size = 5) + geom_node_text(aes(filter = !leaf, label = Lineage), color = 'black', size = 5, repel = TRUE) + guides(edge_colour = guide_legend(title = "Lineage"), color = guide_legend(title = "Lineage")) + theme_linedraw(base_size = 16) + scale_edge_color_manual(values = d_palette) + scale_color_manual(values = d_palette) + scale_y_reverse(breaks = seq(0, 2, by = 1), labels = c("Type", "Lineage", "Root")) + theme(axis.text.y = element_blank()) + expand_limits(y = -5) + coord_flip() + ggtitle("SETA: Taxonomy") ## ----counts and metadata------------------------------------------------------ # Create sample x free_annotation counts: taxa_counts <- setaCounts( data.frame(colData(sce)), cell_type_col = "free_annotation", sample_col = "mouse.id", bc_col = "rownames" ) # Create SETA sample-level metadata meta_df <- setaMetadata(data.frame(colData(sce)), sample_col = "mouse.id", meta_cols = c("age", "sex")) bal_out <- setaTransform( counts = taxa_counts, method = "balance", balances = list( epi_vs_myeloid = list( num = "Epithelial", denom = "Myeloid" ) ), taxonomyDF = tax_df, taxonomy_col = "Lineage" ) # merge the balances with sample-level metadata bal_df <- data.frame( sample_id = rownames(bal_out$counts), epi_vs_myeloid = as.numeric(bal_out$counts[, "epi_vs_myeloid"]), stringsAsFactors = FALSE ) |> dplyr::left_join(meta_df, by = "sample_id") # Compare age groups by epithelial to myeloid ratios library(ggplot2) ggplot(bal_df, aes(x = age, y = epi_vs_myeloid, fill = age)) + geom_boxplot(outlier.shape = NA, alpha = 0.4) + geom_jitter(width = 0.15, size = 2) + scale_fill_manual(values = d_palette) + labs(title = "Epithelial / Myeloid balance across age groups", y = "log GM(Epithelial) - log GM(Myeloid)", x = "Age (months)") + theme_minimal(base_size = 14) + theme(legend.position = "none") ## ----reference frame calculation---------------------------------------------- # We'll transform them with a 'Lineage' grouping. # The taxonomyDF uses rownames = free_annotation # so that colnames(taxa_counts) align with rownames(tax_df). refframe_out <- setaTransform( counts = taxa_counts, method = "CLR", taxonomyDF = tax_df, taxonomy_col = "Lineage", within_resolution = TRUE ) refframe_out$within_resolution refframe_out$grouping_var # Compare to a standard global CLR transform: global_out <- setaTransform(taxa_counts, method = "CLR") ## ----latent reference frames, fig.width = 10, fig.height = 6------------------ # A) Global CLR latent_global <- setaLatent(global_out, method = "PCA", dims = 2) pca_global <- latent_global$latentSpace pca_global$sample_id <- rownames(pca_global) # B) Within-Lineage CLR latent_lineage <- setaLatent(refframe_out, method = "PCA", dims = 2) pca_lineage <- latent_lineage$latentSpace pca_lineage$sample_id <- rownames(pca_lineage) # Merge with metadata pca_global <- left_join(pca_global, meta_df) pca_lineage <- left_join(pca_lineage, meta_df) # Plot side by side p1 <- ggplot(pca_global, aes(x = PC1, y = PC2, color = age)) + geom_text(aes(label = sample_id)) + scale_color_manual( values = d_palette ) + labs(title = "Global CLR PCA", x = "PC1", y = "PC2", color = "Age (Months)") + theme_minimal() + xlab(sprintf("PC1 (%s%%)", signif(latent_global$varExplained[1], 4) * 100)) + ylab(sprintf("PC2 (%s%%)", signif(latent_global$varExplained[2], 4) * 100)) + theme(legend.position = 'none') p2 <- ggplot(pca_lineage, aes(x = PC1, y = PC2, color = age)) + geom_text(aes(label = sample_id)) + scale_color_manual( values = d_palette ) + labs(title = "Within-Lineage CLR PCA", x = "PC1", y = "PC2", color = "Age (Months)") + theme_minimal() + xlab(sprintf("PC1 (%s%%)", signif(latent_lineage$varExplained[1], 4) * 100)) + ylab(sprintf("PC2 (%s%%)", signif(latent_lineage$varExplained[2], 4) * 100)) p1 + p2 ## ----libraries used----------------------------------------------------------- sessionInfo()