Skip to content
Snippets Groups Projects
Commit 269e0876 authored by Jeffrey Pullin's avatar Jeffrey Pullin
Browse files

Add wrapper functions for scanpy and Seurat

parent 6425ab67
No related branches found
No related tags found
No related merge requests found
#' Convert a SingleCellExperiment object to a (in R) AnnData object.
#'
#' @param sce A SingleCellExperiment object.
#'
as_anndata <- function(sce){
stopifnot(is(sce, "SingleCellExperiment"))
# Grab the scanpy python module.
# FIXME: Is there a way to avoid doing this every time?
sc <- import("scanpy")
sce2 <- sce
# DelayedArrays are not handled by {reticulate}.
if (is(counts(sce2), "DelayedArray")) {
counts(sce2) <- as(counts(sce2), "dgCMatrix")
}
# logcounts are converted to a normal matrix as they are not sparse.
if (is(logcounts(sce2), "DelayedArray")) {
logcounts(sce2) <- as(logcounts(sce2), "Matrix")
}
# Taken from: https://theislab.github.io/scanpy-in-R/#content
# TODO Could this also convert the reduced dims slots?
out <- sc$AnnData(
X = t(logcounts(sce2)),
obs = as.data.frame(colData(sce2)),
var = as.data.frame(rowData(sce2))
)
out
}
#' Find marker genes using scanpy and return the results to R conveniently
#'
#'
scanpy_rank_genes_groups <- function(ann_data, groupby = "label", method, ...) {
stopifnot(inherits(ann_data, "anndata._core.anndata.AnnData"))
sc$tl$rank_genes_groups(ann_data, "label", method)
# FIXME: Need to change name of data
out <- list()
py_run_string(
"names = pd.DataFrame(r.adata_sce.uns['rank_genes_groups']['names'])"
)
out <- c(out, list(tidyr::pivot_longer(
py$names, cols = everything(),
names_to = "cluster", values_to = "gene")
))
py_run_string(
"scores = pd.DataFrame(r.adata_sce.uns['rank_genes_groups']['scores'])"
)
out <- c(out, list(tidyr::pivot_longer(
py$scores, cols = everything(),
names_to = "cluster", values_to = "score")
))
py_run_string(
"pvals = pd.DataFrame(r.adata_sce.uns['rank_genes_groups']['pvals'])"
)
out <- c(out, list(tidyr::pivot_longer(
py$pvals, cols = everything(),
names_to = "cluster", values_to = "pval")
))
py_run_string(
"pvals_adj = pd.DataFrame(r.adata_sce.uns['rank_genes_groups']['pvals_adj'])"
)
out <- c(out, list(tidyr::pivot_longer(
py$pvals_adj, cols = everything(),
names_to = "cluster", values_to = "pval_adj")
))
py_run_string(
"logfoldchanges = pd.DataFrame(r.adata_sce.uns['rank_genes_groups']['logfoldchanges'])"
)
out <- c(out, list(tidyr::pivot_longer(
py$logfoldchanges, cols = everything(),
names_to = "cluster", values_to = "logfoldchange")
))
bind_cols(out)
}
#' Convert a SingleCellExperiment object to a Seurat data object
#'
#' This is a wrapper around the `as.Seurat` function.
#'
#' @param sce A SingleCellExperiment object
#'
as_seurat_data <- function(sce) {
stopifnot(is(sce, "SingleCellExperiment"))
sce2 <- sce
# Seurat requires the object to have column names.
if (is.null(colnames(sce2))) {
colnames(sce2) <- paste0("cell_", seq_len(ncol(sce2)))
}
# DelayedArrays are not supported by Seurat.
if (is(counts(sce2), "DelayedArray")) {
counts(sce2) <- as(counts(sce2), "dgCMatrix")
}
# logcounts are converted to a normal matrix as they are not sparse.
if (is(logcounts(sce2), "DelayedArray")) {
logcounts(sce2) <- as(logcounts(sce2), "Matrix")
}
out <- Seurat::as.Seurat(sce2)
# FIXME Is this always necessary?
# Add the cluster ids to the Seurat object.
Idents(out) <- colLabels(sce2)
out
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment