From a119373472de3ee0e6ab0b4262a6fce293b071e5 Mon Sep 17 00:00:00 2001 From: Luke Zappia <lazappi@users.noreply.github.com> Date: Thu, 6 Jun 2019 11:21:30 +1000 Subject: [PATCH] Add variable gene correlation plot to compareSCEs --- DESCRIPTION | 4 ++-- NAMESPACE | 8 +++++++ R/compare.R | 39 +++++++++++++++++++++++++++++++---- tests/testthat/test-compare.R | 4 ++-- 4 files changed, 47 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f88d125..69b1543 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,7 +41,8 @@ Imports: stats, SummarizedExperiment, utils, - crayon + crayon, + S4Vectors Suggests: BiocStyle, covr, @@ -53,7 +54,6 @@ Suggests: pscl, testthat, rmarkdown, - S4Vectors, scDD, scran, mfa, diff --git a/NAMESPACE b/NAMESPACE index d9df705..c9b65bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -78,8 +78,11 @@ exportClasses(SplatParams) exportClasses(ZINBParams) importFrom(BiocParallel,SerialParam) importFrom(BiocParallel,bplapply) +importFrom(S4Vectors,"metadata<-") +importFrom(S4Vectors,metadata) importFrom(SingleCellExperiment,"cpm<-") importFrom(SingleCellExperiment,SingleCellExperiment) +importFrom(SingleCellExperiment,cpm) importFrom(SummarizedExperiment,"assays<-") importFrom(SummarizedExperiment,"colData<-") importFrom(SummarizedExperiment,"rowData<-") @@ -96,15 +99,19 @@ importFrom(checkmate,checkIntegerish) importFrom(checkmate,checkNumber) importFrom(checkmate,checkNumeric) importFrom(ggplot2,aes_string) +importFrom(ggplot2,coord_fixed) importFrom(ggplot2,element_blank) +importFrom(ggplot2,facet_wrap) importFrom(ggplot2,geom_abline) importFrom(ggplot2,geom_boxplot) importFrom(ggplot2,geom_hline) importFrom(ggplot2,geom_point) importFrom(ggplot2,geom_smooth) +importFrom(ggplot2,geom_tile) importFrom(ggplot2,ggplot) importFrom(ggplot2,ggtitle) importFrom(ggplot2,scale_colour_manual) +importFrom(ggplot2,scale_fill_distiller) importFrom(ggplot2,scale_fill_manual) importFrom(ggplot2,scale_x_log10) importFrom(ggplot2,scale_y_continuous) @@ -122,6 +129,7 @@ importFrom(methods,slot) importFrom(methods,slotNames) importFrom(methods,validObject) importFrom(stats,aggregate) +importFrom(stats,cor) importFrom(stats,dnbinom) importFrom(stats,ks.test) importFrom(stats,median) diff --git a/R/compare.R b/R/compare.R index a009846..dd556d8 100644 --- a/R/compare.R +++ b/R/compare.R @@ -50,9 +50,12 @@ #' names(comparison) #' names(comparison$Plots) #' @importFrom ggplot2 ggplot aes_string geom_point geom_smooth geom_boxplot -#' scale_y_continuous scale_y_log10 scale_x_log10 xlab ylab ggtitle -#' theme_minimal scale_colour_manual scale_fill_manual -#' @importFrom SingleCellExperiment cpm<- +#' geom_tile scale_y_continuous scale_y_log10 scale_x_log10 scale_colour_manual +#' scale_fill_manual scale_fill_distiller coord_fixed facet_wrap xlab ylab +#' ggtitle theme_minimal +#' @importFrom S4Vectors metadata<- metadata +#' @importFrom SingleCellExperiment cpm<- cpm +#' @importFrom stats cor #' @export compareSCEs <- function(sces, point.size = 0.1, point.alpha = 0.1, fits = TRUE, colours = NULL) { @@ -81,22 +84,37 @@ compareSCEs <- function(sces, point.size = 0.1, point.alpha = 0.1, sce <- addFeatureStats(sce, "cpm", log = TRUE) n.features <- colData(sce)$total_features_by_counts colData(sce)$PctZero <- 100 * (1 - n.features / nrow(sce)) + + var.genes <- rev(order(rowData(sce)$VarLogCPM))[1:100] + var.cpm <- log2(cpm(sce)[var.genes, ] + 1) + var.cors <- as.data.frame.table(cor(t(var.cpm), method = "spearman")) + colnames(var.cors) <- c("GeneA", "GeneB", "Correlation") + var.cors$VarGeneA <- rep(paste0("VarGene", 1:100), 100) + var.cors$VarGeneB <- rep(paste0("VarGene", 1:100), each = 100) + var.cors$Dataset <- name + var.cors <- var.cors[, c("Dataset", "GeneA", "GeneB", "VarGeneA", + "VarGeneB", "Correlation")] + metadata(sce)$VarGeneCorrelation <- var.cors + sces[[name]] <- sce } features <- rowData(sces[[1]]) cells <- colData(sces[[1]]) + var.cors <- metadata(sces[[1]])$VarGeneCorrelation if (length(sces) > 1) { for (name in names(sces)[-1]) { sce <- sces[[name]] features <- rbindMatched(features, rowData(sce)) cells <- rbindMatched(cells, colData(sce)) + var.cors <- rbindMatched(var.cors, metadata(sce)$VarGeneCorrelation) } } features$Dataset <- factor(features$Dataset, levels = names(sces)) cells$Dataset <- factor(cells$Dataset, levels = names(sces)) + var.cors$Dataset <- factor(var.cors$Dataset, levels = names(sces)) features <- data.frame(features) cells <- data.frame(cells) @@ -172,6 +190,18 @@ compareSCEs <- function(sces, point.size = 0.1, point.alpha = 0.1, ggtitle("Mean-zeros relationship") + theme_minimal() + var.correlation <- ggplot(var.cors, + aes_string(x = "VarGeneA", y = "VarGeneB", + fill = "Correlation")) + + geom_tile() + + scale_fill_distiller(palette = "RdBu", limits = c(-1, 1)) + + coord_fixed() + + facet_wrap(~ Dataset) + + ggtitle("Correlation - 100 variable genes") + + theme_minimal() + + theme(axis.title = element_blank(), + axis.text = element_blank()) + if (fits) { mean.var <- mean.var + geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs")) @@ -187,7 +217,8 @@ compareSCEs <- function(sces, point.size = 0.1, point.alpha = 0.1, LibrarySizes = libs, ZerosGene = z.gene, ZerosCell = z.cell, - MeanZeros = mean.zeros)) + MeanZeros = mean.zeros, + VarGeneCor = var.correlation)) return(comparison) } diff --git a/tests/testthat/test-compare.R b/tests/testthat/test-compare.R index 29540df..960cbf4 100644 --- a/tests/testthat/test-compare.R +++ b/tests/testthat/test-compare.R @@ -12,9 +12,9 @@ test_that("compareSCEs works", { names(comparison))) checkmate::expect_class(comparison$ColData, "data.frame") checkmate::expect_class(comparison$RowData, "data.frame") - expect_length(comparison$Plots, 7) + expect_length(comparison$Plots, 8) expect_true(all(c("Means", "Variances", "MeanVar", "LibrarySizes", - "ZerosGene", "ZerosCell", "MeanZeros") %in% + "ZerosGene", "ZerosCell", "MeanZeros", "VarGeneCor") %in% names(comparison$Plots))) for (plot in names(comparison$Plots)) { checkmate::expect_class(comparison$Plots[[plot]], "ggplot") -- GitLab