BLASE for annotating scRNA-seq

0.1 Load Data
0.2 Prepare BLASE
- 0.2.1 Create BLASE data object
- 0.2.2 Select Genes
0.3 Calculate Mappings
0.4 Transfer Mappings
0.5 Session Info

library(scater)
library(ggplot2)
library(BiocParallel)
library(blase)
library(patchwork)

RNGversion("3.5.0")
#> Warning in RNGkind("Mersenne-Twister", "Inversion", "Rounding"): non-uniform
#> 'Rounding' sampler used
SEED <- 7
set.seed(SEED)

N_CORES <- 2
bpparam <- MulticoreParam(N_CORES)

This article will show how BLASE can be used for annotating scRNA-seq data using existing bulk or microarray data. We make use of scRNA-seq Dogga et al. 2024 and microarray data from Painter et al. 2018. Code for generating the objects used here is available in the BLASE reproducibility repository.

0.1 Load Data

First we’ll load in the data we’re using, pre-prepared from the BLASE reproducibility repository.

data(painter_microarray, package = "blase")
data(MCA_PF_SCE, package = "blase")

We can examine the true lifecycle stages, and also the calculated pseudotime trajectory (Slingshot).

plotUMAP(MCA_PF_SCE, colour_by = "STAGE_HR")

UMAP of Dogga et al. single-cell RNA-seq reference coloured by lifecycle stage, showing the cycle from Rings, Trophozoites, Schizonts, to Rings again.

#| fig.alt: >
#|   UMAP of Dogga et al. single-cell RNA-seq reference coloured by pseudotime,
#|   starting with Rings, and ending with Schizonts.
plotUMAP(MCA_PF_SCE, colour_by = "slingPseudotime_1")

UMAP of Dogga et al. single-cell RNA-seq reference coloured by lifecycle stage, showing the cycle from Rings, Trophozoites, Schizonts, to Rings again.

0.2 Prepare BLASE

Now we’ll prepare BLASE for use.

0.2.1 Create BLASE data object

First, we create the object, giving it the number of bins we want to use, and how to calculate them.

blase_data <- as.BlaseData(
    MCA_PF_SCE,
    pseudotime_slot = "slingPseudotime_1",
    n_bins = 48,
    split_by = "cells"
)

# Add these bins to the sc metadata
MCA_PF_SCE <- assign_pseudotime_bins(MCA_PF_SCE,
    pseudotime_slot = "slingPseudotime_1",
    n_bins = 48,
    split_by = "cells"
)

0.2.2 Select Genes

Now we will select the genes we want to use, using BLASE’s gene peakedness metric.

gene_peakedness_info <- calculate_gene_peakedness(
    MCA_PF_SCE,
    window_pct = 5,
    knots = 18,
    BPPARAM = bpparam
)

genes_to_use <- gene_peakedness_spread_selection(
    MCA_PF_SCE,
    gene_peakedness_info,
    genes_per_bin = 30,
    n_gene_bins = 30
)

head(gene_peakedness_info)
#>             gene peak_pseudotime mean_in_window mean_out_window     ratio
#> 28 PF3D7-1401100       3.8311312       5.793085        1.528934  3.788969
#> 44 PF3D7-1401200       6.0203490       4.791942        1.921439  2.493934
#> 29 PF3D7-1401400       3.9679573     696.174509       39.689901 17.540344
#> 84 PF3D7-1401600      11.4933936      93.076303        7.631160 12.196875
#> 1  PF3D7-1401900       0.1368261      43.270750        3.321464 13.027613
#> 80 PF3D7-1402200      10.9460892       3.754015        1.554102  2.415553
#>    window_start window_end deviance_explained
#> 28    3.4890659  4.1731965          0.1881312
#> 44    5.6782838  6.3624143          0.2035355
#> 29    3.6258920  4.3100226          0.2513795
#> 84   11.1513283 11.8354589          0.6891177
#> 1    -0.2052392  0.4788914          0.3846938
#> 80   10.6040239 11.2881544          0.1768597

By using the gene_peakedness_spread_selection function, we can ensure that genes with high ratios are selected from throughout the trajectory.

ggplot(gene_peakedness_info, aes(x = peak_pseudotime, y = ratio)) +
    geom_point() +
    ggtitle("All genes")

Scatter plot showing gene peakedness ratios for all genes, ordered by pseudotime.


gene_peakedness_selected_genes <- gene_peakedness_info[
    gene_peakedness_info$gene %in% genes_to_use,
]

ggplot(gene_peakedness_selected_genes, aes(x = peak_pseudotime, y = ratio)) +
    geom_point() +
    ggtitle("Selected genes")

Scatter plot showing gene peakedness ratios for genes selected by BLASE spread selection, ordered by pseudotime.

Here, we add them to the BLASE object for mapping with.

genes(blase_data) <- genes_to_use

0.3 Calculate Mappings

Now we can perform the actual mapping step, and review the results.

mapping_results <- map_all_best_bins(
    blase_data,
    painter_microarray,
    BPPARAM = bpparam
)

plot_mapping_result_heatmap(mapping_results)

Heatmap of mapping correlations of the Painter et al. data onto the Dogga et al. scRNA-seq.

0.4 Transfer Mappings

In the Painter et al. paper, the expected lifecycle stages are as follows:

Lifecycle Stage	Painter et al. HPI
Ring	0-21
Trophozoite	16-32
Schizont	33-48

We can use this to transfer back to the scRNA-seq as follows:

plotUMAP(MCA_PF_SCE, colour_by = "pseudotime_bin")

A UMAP of the reference scRNA-seq data coloured by pseudotime bin, as calculated by BLASE.

annotated_sce <- annotate_sce(MCA_PF_SCE, mapping_results, include_stats = TRUE)
annotated_sce$BLASE_Annotation <- gsub(" hpi", "", annotated_sce$BLASE_Annotation)
annotated_sce$BLASE_Annotation <- as.numeric(annotated_sce$BLASE_Annotation)

annotation_plot <- plotUMAP(annotated_sce, colour_by = "BLASE_Annotation")
corr_plot <- plotUMAP(annotated_sce, colour_by = "BLASE_Annotation_Correlation") +
    labs(color = "Correlation")

annotation_plot + corr_plot

UMAP of Dogga et al. single-cell RNA-seq reference coloured by best matching bulk RNA-seq sample (hpi). Beside it is a UMAP of the correlations of these mappings.

annotated_sce$BLASE_Annotation_transfer <- ""
for (best_bulk in unique(annotated_sce$BLASE_Annotation)) {
    mask <- colData(annotated_sce)[, "BLASE_Annotation"] == best_bulk

    if (best_bulk %in% 0:15) {
        annotated_sce[, mask]$BLASE_Annotation_transfer <- "Ring"
    } else if (best_bulk %in% 16:21) {
        annotated_sce[, mask]$BLASE_Annotation_transfer <- "Ring/Trophozoite"
    } else if (best_bulk %in% 22:32) {
        annotated_sce[, mask]$BLASE_Annotation_transfer <- "Trophozoite"
    } else if (best_bulk %in% 33:48) {
        annotated_sce[, mask]$BLASE_Annotation_transfer <- "Schizont"
    }
}

plotUMAP(annotated_sce, colour_by = "BLASE_Annotation_transfer") +
    plotUMAP(annotated_sce, colour_by = "STAGE_LR")

0.5 Session Info

sessionInfo()
#> R Under development (unstable) (2025-10-20 r88955)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.23-bioc/R/lib/libRblas.so 
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0  LAPACK version 3.12.0
#> 
#> Random number generation:
#>  RNG:     Mersenne-Twister 
#>  Normal:  Inversion 
#>  Sample:  Rounding 
#>  
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: America/New_York
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] patchwork_1.3.2             blase_1.1.0                
#>  [3] BiocParallel_1.45.0         scater_1.39.0              
#>  [5] ggplot2_4.0.0               scuttle_1.21.0             
#>  [7] SingleCellExperiment_1.33.0 SummarizedExperiment_1.41.0
#>  [9] Biobase_2.71.0              GenomicRanges_1.63.0       
#> [11] Seqinfo_1.1.0               IRanges_2.45.0             
#> [13] S4Vectors_0.49.0            BiocGenerics_0.57.0        
#> [15] generics_0.1.4              MatrixGenerics_1.23.0      
#> [17] matrixStats_1.5.0           BiocStyle_2.39.0           
#> 
#> loaded via a namespace (and not attached):
#>   [1] RColorBrewer_1.1-3       jsonlite_2.0.0           magrittr_2.0.4          
#>   [4] magick_2.9.0             spatstat.utils_3.2-0     ggbeeswarm_0.7.2        
#>   [7] farver_2.1.2             rmarkdown_2.30           vctrs_0.6.5             
#>  [10] ROCR_1.0-11              spatstat.explore_3.5-3   tinytex_0.57            
#>  [13] htmltools_0.5.8.1        S4Arrays_1.11.0          BiocNeighbors_2.5.0     
#>  [16] SparseArray_1.11.1       sass_0.4.10              sctransform_0.4.2       
#>  [19] parallelly_1.45.1        KernSmooth_2.23-26       bslib_0.9.0             
#>  [22] htmlwidgets_1.6.4        ica_1.0-3                plyr_1.8.9              
#>  [25] plotly_4.11.0            zoo_1.8-14               cachem_1.1.0            
#>  [28] igraph_2.2.1             mime_0.13                lifecycle_1.0.4         
#>  [31] pkgconfig_2.0.3          rsvd_1.0.5               Matrix_1.7-4            
#>  [34] R6_2.6.1                 fastmap_1.2.0            fitdistrplus_1.2-4      
#>  [37] future_1.67.0            shiny_1.11.1             digest_0.6.37           
#>  [40] tensor_1.5.1             Seurat_5.3.1             RSpectra_0.16-2         
#>  [43] irlba_2.3.5.1            beachmat_2.27.0          labeling_0.4.3          
#>  [46] progressr_0.17.0         spatstat.sparse_3.1-0    mgcv_1.9-3              
#>  [49] polyclip_1.10-7          httr_1.4.7               abind_1.4-8             
#>  [52] compiler_4.6.0           withr_3.0.2              S7_0.2.0                
#>  [55] viridis_0.6.5            fastDummies_1.7.5        MASS_7.3-65             
#>  [58] DelayedArray_0.37.0      tools_4.6.0              vipor_0.4.7             
#>  [61] lmtest_0.9-40            otel_0.2.0               beeswarm_0.4.0          
#>  [64] httpuv_1.6.16            future.apply_1.20.0      goftest_1.2-3           
#>  [67] glue_1.8.0               nlme_3.1-168             promises_1.5.0          
#>  [70] grid_4.6.0               Rtsne_0.17               reshape2_1.4.4          
#>  [73] cluster_2.1.8.1          spatstat.data_3.1-9      gtable_0.3.6            
#>  [76] tidyr_1.3.1              data.table_1.17.8        BiocSingular_1.27.0     
#>  [79] ScaledMatrix_1.19.0      sp_2.2-0                 XVector_0.51.0          
#>  [82] spatstat.geom_3.6-0      RcppAnnoy_0.0.22         stringr_1.5.2           
#>  [85] ggrepel_0.9.6            RANN_2.6.2               pillar_1.11.1           
#>  [88] spam_2.11-1              RcppHNSW_0.6.0           later_1.4.4             
#>  [91] splines_4.6.0            dplyr_1.1.4              lattice_0.22-7          
#>  [94] deldir_2.0-4             survival_3.8-3           tidyselect_1.2.1        
#>  [97] miniUI_0.1.2             pbapply_1.7-4            knitr_1.50              
#> [100] gridExtra_2.3            bookdown_0.45            scattermore_1.2         
#> [103] xfun_0.54                stringi_1.8.7            boot_1.3-32             
#> [106] lazyeval_0.2.2           yaml_2.3.10              evaluate_1.0.5          
#> [109] codetools_0.2-20         tibble_3.3.0             BiocManager_1.30.26     
#> [112] cli_3.6.5                uwot_0.2.3               xtable_1.8-4            
#> [115] reticulate_1.44.0        jquerylib_0.1.4          dichromat_2.0-0.1       
#> [118] Rcpp_1.1.0               spatstat.random_3.4-2    globals_0.18.0          
#> [121] png_0.1-8                spatstat.univar_3.1-4    parallel_4.6.0          
#> [124] dotCall64_1.2            sparseMatrixStats_1.23.0 listenv_0.10.0          
#> [127] viridisLite_0.4.2        scales_1.4.0             ggridges_0.5.7          
#> [130] SeuratObject_5.2.0       purrr_1.1.0              rlang_1.1.6             
#> [133] cowplot_1.2.0