Add wrapper functions for scanpy and Seurat

269e0876 · Jeffrey Pullin · 6425ab67 · 269e0876 · 269e0876
Commit 269e0876 authored 4 years ago by Jeffrey Pullin
--- a/code/scanpy.R
+++ b/code/scanpy.R
+#' Convert a SingleCellExperiment object to a (in R) AnnData object.
+#'
+#' @param sce A SingleCellExperiment object.
+#'
+as_anndata <- function(sce){
+
+  stopifnot(is(sce, "SingleCellExperiment"))
+
+  # Grab the scanpy python module.
+  # FIXME: Is there a way to avoid doing this every time?
+  sc <- import("scanpy")
+
+  sce2 <- sce
+
+  # DelayedArrays are not handled by {reticulate}.
+  if (is(counts(sce2), "DelayedArray")) {
+    counts(sce2) <- as(counts(sce2), "dgCMatrix")
+  }
+
+  # logcounts are converted to a normal matrix as they are not sparse.
+  if (is(logcounts(sce2), "DelayedArray")) {
+    logcounts(sce2) <- as(logcounts(sce2), "Matrix")
+  }
+
+  # Taken from: https://theislab.github.io/scanpy-in-R/#content
+  # TODO Could this also convert the reduced dims slots?
+  out <- sc$AnnData(
+    X   = t(logcounts(sce2)),
+    obs = as.data.frame(colData(sce2)),
+    var = as.data.frame(rowData(sce2))
+  )
+
+  out
+}
+
+#' Find marker genes using scanpy and return the results to R conveniently
+#'
+#'
+scanpy_rank_genes_groups <- function(ann_data, groupby = "label", method, ...) {
+
+  stopifnot(inherits(ann_data, "anndata._core.anndata.AnnData"))
+
+  sc$tl$rank_genes_groups(ann_data, "label", method)
+
+  # FIXME: Need to change name of data
+
+  out <- list()
+  py_run_string(
+    "names = pd.DataFrame(r.adata_sce.uns['rank_genes_groups']['names'])"
+  )
+  out <- c(out, list(tidyr::pivot_longer(
+    py$names, cols = everything(),
+    names_to = "cluster", values_to = "gene")
+  ))
+
+  py_run_string(
+    "scores = pd.DataFrame(r.adata_sce.uns['rank_genes_groups']['scores'])"
+  )
+  out <- c(out, list(tidyr::pivot_longer(
+    py$scores, cols = everything(),
+    names_to = "cluster", values_to = "score")
+  ))
+
+  py_run_string(
+    "pvals = pd.DataFrame(r.adata_sce.uns['rank_genes_groups']['pvals'])"
+  )
+  out <- c(out, list(tidyr::pivot_longer(
+    py$pvals, cols = everything(),
+    names_to = "cluster", values_to = "pval")
+  ))
+
+  py_run_string(
+    "pvals_adj = pd.DataFrame(r.adata_sce.uns['rank_genes_groups']['pvals_adj'])"
+  )
+  out <- c(out, list(tidyr::pivot_longer(
+    py$pvals_adj, cols = everything(),
+    names_to = "cluster", values_to = "pval_adj")
+  ))
+
+  py_run_string(
+    "logfoldchanges = pd.DataFrame(r.adata_sce.uns['rank_genes_groups']['logfoldchanges'])"
+  )
+  out <- c(out, list(tidyr::pivot_longer(
+    py$logfoldchanges, cols = everything(),
+    names_to = "cluster", values_to = "logfoldchange")
+  ))
+
+  bind_cols(out)
+
+}
+
--- a/code/seurat.R
+++ b/code/seurat.R
+#' Convert a SingleCellExperiment object to a Seurat data object
+#'
+#' This is a wrapper around the `as.Seurat` function.
+#'
+#' @param sce A SingleCellExperiment object
+#'
+as_seurat_data <- function(sce) {
+
+  stopifnot(is(sce, "SingleCellExperiment"))
+
+  sce2 <- sce
+
+  # Seurat requires the object to have column names.
+  if (is.null(colnames(sce2))) {
+    colnames(sce2) <- paste0("cell_", seq_len(ncol(sce2)))
+  }
+
+  # DelayedArrays are not supported by Seurat.
+  if (is(counts(sce2), "DelayedArray")) {
+    counts(sce2) <- as(counts(sce2), "dgCMatrix")
+  }
+
+  # logcounts are converted to a normal matrix as they are not sparse.
+  if (is(logcounts(sce2), "DelayedArray")) {
+    logcounts(sce2) <- as(logcounts(sce2), "Matrix")
+  }
+
+  out <- Seurat::as.Seurat(sce2)
+
+  # FIXME Is this always necessary?
+  # Add the cluster ids to the Seurat object.
+  Idents(out) <- colLabels(sce2)
+
+  out
+}