## ----------------------------------------------------------------------------- #| eval: false # install.packages("BiocManager") # BiocManager::install("SpectriPy") ## ----------------------------------------------------------------------------- #| label: libraries #| message: false #' R session: library(Spectra) library(SpectriPy) ## ----------------------------------------------------------------------------- stopifnot(require("MsDataHub")) ## ----------------------------------------------------------------------------- #| label: load-data #| message: false #' R session: #' Loading the data from a mzML file as a `Spectra` object library(MsDataHub) fl <- MsDataHub::PestMix1_DDA.mzML() mzml_r <- Spectra(fl) ## ----------------------------------------------------------------------------- #| label: filter MS level #' R session: #' Restrict to MS level 2 spectra mzml_r <- filterMsLevel(mzml_r, 2) mzml_r <- mzml_r[lengths(mzml_r) >= 3] mzml_r ## ----------------------------------------------------------------------------- #| label: convert spectra to matchms #' R session: #' Convert the R Spectra to a list of Python matchms.Spectrum objects tmp <- rspec_to_pyspec(mzml_r) ## ----------------------------------------------------------------------------- #' R session: #' Class of the converted variable class(tmp) #' First element class(tmp[1]) ## #' Python session: ## ## #' Access the Python data structure stored in the R session ## r.tmp ## ----------------------------------------------------------------------------- #| label: assign variable to Python #' R session: #' Assign the data to a variable in the Python environment py_set_attr(py, "mzml_py", rspec_to_pyspec(mzml_r)) names(py) ## #' Python session: ## ## #' Data type of the variable: ## type(mzml_py) ## ## #' The length of the list: ## len(mzml_py) ## ## #' Data type of the first element: ## type(mzml_py[0]) ## ## #' Intensities of the first spectrum: ## mzml_py[0].peaks.intensities ## #' Python session: ## ## import matchms.filtering as mms_filt ## ## #' Iterate over the Spectrum list and scale the intensities ## for i in range(len(mzml_py)): ## mzml_py[i] = mms_filt.normalize_intensities(mzml_py[i]) ## ## #' Intensities for the first spectrum ## mzml_py[0].peaks.intensities ## ----------------------------------------------------------------------------- #| label: inspect changed intensities in R #' R session: #' Access the intensities of the first spectrum py_get_attr(py, "mzml_py")[0]$peaks$intensities ## #' Python session: ## ## #' Access the `fl` variable from the R session: ## r.fl ## ## type(r.fl) ## #' Python session: ## ## #' Access the `Spectra` object with the original data from R; the data ## #' gets directly translated on-the-fly ## r.mzml_r ## ## #' Access the intensities of the first spectrum ## r.mzml_r[0].peaks.intensities ## ----------------------------------------------------------------------------- #| label: define MGF file #' R session: f_mgf <- system.file("extdata", "mgf", "test.mgf", package = "SpectriPy") ## #' Python session ## ## import matchms ## from matchms.importing import load_from_mgf ## ## mgf_py = list(load_from_mgf(r.f_mgf)) ## mgf_py ## ----------------------------------------------------------------------------- #' R session: #' Access the first spectrum py$mgf_py[[1]] ## ----------------------------------------------------------------------------- #' R session: #' Extract the peaks matrix from the first spectrum py_to_r(py$mgf_py[[1]]$peaks$to_numpy) ## ----------------------------------------------------------------------------- #' R session: #' Convert and copy the data to R mgf_r <- pyspec_to_rspec(py_get_attr(py, "mgf_py")) mgf_r #' Extract intensities mgf_r$intensity ## ----------------------------------------------------------------------------- #| label: initialize MsBackendPy for Python variable #' R session: #' Create a Spectra object with a MsBackendPy backend for the #' attribute "mgf_py" mgf <- Spectra("mgf_py", source = MsBackendPy()) mgf ## ----------------------------------------------------------------------------- #| label: access MS level and intensity through MsBackendPy #' R session: #' Extract MS level msLevel(mgf) #' Extract intensity values intensity(mgf) ## #' Python session: ## ## #' Scale intensities ## for i in range(len(mgf_py)): ## mgf_py[i] = mms_filt.normalize_intensities(mgf_py[i]) ## ## ----------------------------------------------------------------------------- #| label: get intensities after modifying in Python #' R session: #' Get intensities after scaling the intensities in Python: intensity(mgf) ## ----------------------------------------------------------------------------- #' R session: #' List the *default* spectra variable mapping in R and python, respectively defaultSpectraVariableMapping() ## #' Python session: ## ## #' Available metadata for the first spectrum ## mgf_py[0].metadata.keys() ## ----------------------------------------------------------------------------- #' R session: #' Add mapping for additional spectra variables to the default mapping in R and #' python, respectively map <- c(defaultSpectraVariableMapping(), smiles = "smiles", inchi = "inchi", name = "compound_name") map ## ----------------------------------------------------------------------------- #| label: convert R to Python with spectra variables #' R session: #' Convert the Python MS data structures to an R `Spectra` mgf_r <- pyspec_to_rspec(py_get_attr(py, "mgf_py"), mapping = map) spectraVariables(mgf_r) ## ----------------------------------------------------------------------------- #| label: access spectra variables from Python in R #' R session: #' Show the first values for the spectra variable "name" mgf_r$name |> head() #' Show the first values for the spectra variable "inchi" mgf_r$inchi |> head() ## ----------------------------------------------------------------------------- #' R session: #' List available spectra variables spectraVariables(mgf) ## ----------------------------------------------------------------------------- #' R session: #' Get the first entries from the inchi variable mgf$inchi |> head() ## ----------------------------------------------------------------------------- #| label: add additional mapping #' R session: #' Add mapping for additional spectra variables to the `MsBackendPy` m <- defaultSpectraVariableMapping() m["precursorIntensity"] <- "pepmassint" spectraVariableMapping(mgf) <- m #' List available spectra variables spectraVariables(mgf) #' Get spectra data for two variables spectraData(mgf, c("precursorMz", "precursorIntensity")) ## ----------------------------------------------------------------------------- #| label: compareSpectra with mzML and MGF #' R session: #' Create a `Spectra` for the scaled MS data in Python mzml <- Spectra("mzml_py", source = MsBackendPy()) #' Calculate the pairwise similarity between all spectra sim <- compareSpectra(mzml, mgf, tolerance = 0.1) dim(sim) ## #' Python session: ## ## import matchms.similarity as mms_similarity ## ## #' Calculate similarity scores ## scores = matchms.calculate_scores( ## mzml_py, mgf_py, mms_similarity.CosineHungarian(tolerance = 0.1)) ## ## #' Extract similarity scores ## sim = scores.to_array()["CosineHungarian_score"] ## ----------------------------------------------------------------------------- #| label: compare dot product vs Cosine Hungarian #' R session: #' Plot the similarity scores against each other plot(sim, py$sim, pch = 21, col = "#000000ce", bg = "#00000060", xlab = "Dot product", ylab = "Cosine Hungarian") grid() ## ----------------------------------------------------------------------------- #' R session: sessionInfo()