Update analysis

329fa571 · Jeffrey Pullin · 05efe431 · 329fa571
Commit 329fa571 authored 4 years ago by Jeffrey Pullin
--- a/analysis/method-concordance.Rmd
+++ b/analysis/method-concordance.Rmd
@@ -15,12 +15,16 @@ The aim of this analysis is to establish how concordant the various methods are

 ### Analysis

-```{r libraries}
+```{r libraries, warning = FALSE, message = FALSE}
 library(scran)
 library(scater)
 library(Seurat)
 library(DropletUtils)
-library(reticulate)
+library(ggupset)
+library(tibble)
+
+source(here::here("code", "scanpy.R"))
+source(here::here("code", "seurat.R"))
 ```

 First, we load the `pbmc4k` dataset. This dataset is used currently because it is small and easy to obtain. 
@@ -33,35 +37,22 @@ pbmcs
 To perform marker gene analysis we need to have clustered data. To cluster the data we use the following workflow, adapted from the Orchestrating single cell analysis with Bioconductor book:

 ```{r prepare-data}
-sce.pbmc <- pbmcs
-
-#--- normalization ---#
-set.seed(1000)
-clusters <- quickCluster(sce.pbmc)
-sce.pbmc <- computeSumFactors(sce.pbmc, clusters = clusters)
-sce.pbmc <- logNormCounts(sce.pbmc)
-
-#--- variance-modelling ---#
-set.seed(1001)
-dec.pbmc <- modelGeneVarByPoisson(sce.pbmc)
-top.pbmc <- getTopHVGs(dec.pbmc, prop = 0.1)
+sce_pbmc <- pbmcs

-#--- dimensionality-reduction ---#
-set.seed(10000)
-sce.pbmc <- denoisePCA(sce.pbmc, subset.row = top.pbmc, technical = dec.pbmc)
+clusters <- quickCluster(sce_pbmc)
+sce_pbmc <- computeSumFactors(sce_pbmc, clusters = clusters)
+sce_pbmc <- logNormCounts(sce_pbmc)

-set.seed(100000)
-sce.pbmc <- runTSNE(sce.pbmc, dimred = "PCA")
+dec_pbmc <- modelGeneVarByPoisson(sce_pbmc)
+top_pbmc <- getTopHVGs(dec_pbmc, prop = 0.1)

-set.seed(1000000)
-sce.pbmc <- runUMAP(sce.pbmc, dimred = "PCA")
+sce_pbmc <- denoisePCA(sce_pbmc, subset.row = top_pbmc, technical = dec_pbmc)

-#--- clustering ---#
-g <- buildSNNGraph(sce.pbmc, k = 10, use.dimred = 'PCA')
+g <- buildSNNGraph(sce_pbmc, k = 10, use.dimred = "PCA")
 clust <- igraph::cluster_walktrap(g)$membership
-colLabels(sce.pbmc) <- factor(clust)
+colLabels(sce_pbmc) <- factor(clust)

-pbmcs <- sce.pbmc
+pbmcs <- sce_pbmc
 ```

 We can now run the marker gene methods. We will focus our attention on cluster 9. Note that this cluster is focused on in the Marker Gene chapter of OCSA.  
@@ -69,7 +60,6 @@ We can now run the marker gene methods. We will focus our attention on cluster 9
 First we run {scran}:

 ```{r mg-scran}
-# A bit slow on my MacBook Air. 
 scran_mgs <- findMarkers(pbmcs)

 # Focus on the marker genes for cluster 9.
@@ -80,14 +70,7 @@ scran_mgs_9
 Next we can run {Seurat}:

 ```{r mg-seurat}
-# Seurat requires column names and cannot handle DelayedArray objects. 
-pbmcs2 <- pbmcs
-colnames(pbmcs2) <- paste0("cell_", 1:4340)
-counts(pbmcs2) <- as(counts(pbmcs2), "dgCMatrix")
-logcounts(pbmcs2) <- as(logcounts(pbmcs2), "Matrix")
-
-seurat_data <- as.Seurat(pbmcs2)
-Idents(seurat_data) <- colLabels(pbmcs2)
+seurat_data <- as_seurat_data(pbmcs)
 seurat_mgs_9 <- FindMarkers(seurat_data, ident.1 = 9)
 ```

@@ -102,7 +85,29 @@ scran_top_genes <- row.names(scran_top)
 seurat_top_genes <- row.names(seurat_top)

 length(intersect(scran_top_genes, seurat_top_genes))
+```
+
+```{r upset-plot}
+genes <- union(scran_top_genes, seurat_top_genes)
+method <- list()
+for (gene in genes) {
+  to_add <- c()
+  if (gene %in% scran_top_genes) {
+    to_add <- c(to_add, "scran")
+  } 
+  if (gene %in% seurat_top_genes) {
+    to_add <- c(to_add, "seurat")
+  } 
+  method <- c(method, list(to_add))
+}
+
+upset_data <- tibble(genes, method)
+ggplot(upset_data, aes(x = method)) +
+    geom_bar() +
+    scale_x_upset()
+```

+```{r, eval = FALSE}
 # 24 marker genes
 scran_top <- scran_mgs_9[scran_mgs_9$Top <= 2,][, 1:4]
 seurat_top <- seurat_mgs_9[1:24, ]
@@ -112,6 +117,9 @@ seurat_top_genes <- row.names(seurat_top)

 length(intersect(scran_top_genes, seurat_top_genes))

+
+
+
 # 14 marker genes
 scran_top <- scran_mgs_9[scran_mgs_9$Top <= 1,][, 1:4]
 seurat_top <- seurat_mgs_9[1:14, ]
@@ -135,7 +143,7 @@ length(intersect(scran_top_genes, seurat_top_genes))

 What about {scanpy}?? 

-```{r scanpy}
+```{r scanpy, eval = FALSE}
 sc <- import("scanpy")
 pd <- import("pandas")

@@ -159,5 +167,4 @@ seurat_mgs_9_genes <- row.names(seurat_mgs_9)

 n <- 50
 length(intersect(scanpy_mgs_9_genes[1:n], seurat_mgs_9_genes[1:n]))
-
 ```