#' Compare SCESet objects #' #' Combine the data from several SCESet objects and produce some basic plots #' comparing. #' #' @param sces named list of SCESet objects to combine and compare. #' #' @details #' The return list has three items: #' #' \describe{ #' \item{\code{FeatureData}}{Combined feature data from the provided SCESets.} #' \item{\code{PhenoData}}{Combined pheno data from the provided SCESets.} #' \item{\code{Plots}}{Comparison plots #' \describe{ #' \item{\code{Means}}{Violin plot of mean distribution.} #' \item{\code{Variances}}{Violin plot of variance distribution.} #' \item{\code{MeanVar}}{Scatter plot with fitted lines showing the #' mean-variance relationship.} #' \item{\code{LibraySizes}}{Boxplot of the library size distribution.} #' \item{\code{ZerosGene}}{Boxplot of the percentage of each gene that is #' zero.} #' \item{\code{ZerosCell}}{Boxplot of the percentage of each cell that is #' zero.} #' } #' } #' } #' #' The plots returned by this function are created using #' \code{\link[ggplot2]{ggplot}} and are only a sample of the kind of plots you #' might like to consider. The data used to create these plots is also returned #' and should be in the correct format to allow you to create further plots #' using \code{\link[ggplot2]{ggplot}}. #' #' @return List containing the combined datasets and plots. #' @examples #' sim1 <- splatSimulate(nGenes = 1000, nCells = 20) #' sim2 <- simpleSimulate(nGenes = 1000, nCells = 20) #' comparison <- compareSCESets(list(Splat = sim1, Simple = sim2)) #' names(comparison) #' names(comparison$Plots) #' @importFrom ggplot2 ggplot aes geom_point geom_smooth geom_boxplot #' geom_violin scale_y_continuous scale_y_log10 xlab ylab ggtitle theme_minimal #' @export compareSCESets <- function(sces) { checkmate::assertList(sces, types = "SCESet", any.missing = FALSE, min.len = 1, names = "unique") for (name in names(sces)) { sce <- sces[[name]] fData(sce)$Dataset <- name pData(sce)$Dataset <- name sce <- scater::calculateQCMetrics(sce) cpm(sce) <- edgeR::cpm(counts(sce)) sce <- addFeatureStats(sce, "counts") sce <- addFeatureStats(sce, "cpm") sce <- addFeatureStats(sce, "cpm", log = TRUE) sces[[name]] <- sce } fData.all <- fData(sces[[1]]) fData.all$Dataset <- names(sces)[[1]] pData.all <- pData(sces[[1]]) pData.all$Dataset <- names(sces)[[1]] if (length(sces) > 1) { for (name in names(sces)[-1]) { fData.all <- rbindMatched(fData.all, fData(sce)) pData.all <- rbindMatched(pData.all, pData(sce)) } } means <- ggplot(fData.all, aes(x = Dataset, y = mean_log_cpm, colour = Dataset)) + geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + ylab(expression(paste("Mean ", log[2], "(CPM + 1)"))) + ggtitle("Distribution of mean expression") + theme_minimal() vars <- ggplot(fData.all, aes(x = Dataset, y = var_cpm, colour = Dataset)) + geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + scale_y_log10(labels = scales::comma) + ylab("CPM Variance") + ggtitle("Distribution of variance") + theme_minimal() mean.var <- ggplot(fData.all, aes(x = mean_log_cpm, y = var_log_cpm, colour = Dataset, fill = Dataset)) + geom_point() + geom_smooth() + xlab(expression(paste("Mean ", log[2], "(CPM + 1)"))) + ylab(expression(paste("Variance ", log[2], "(CPM + 1)"))) + ggtitle("Mean-variance relationship") + theme_minimal() libs <- ggplot(pData.all, aes(x = Dataset, y = total_counts, colour = Dataset)) + geom_boxplot() + scale_y_continuous(labels = scales::comma) + ylab("Total counts per cell") + ggtitle("Distribution of library sizes") + theme_minimal() z.gene <- ggplot(fData.all, aes(x = Dataset, y = pct_dropout, colour = Dataset)) + geom_boxplot() + scale_y_continuous(limits = c(0, 100)) + ylab("Percentage zeros per gene") + ggtitle("Distribution of zeros per gene") + theme_minimal() z.cell <- ggplot(pData.all, aes(x = Dataset, y = pct_dropout, colour = Dataset)) + geom_boxplot() + scale_y_continuous(limits = c(0, 100)) + ylab("Percentage zeros per cell") + ggtitle("Distribution of zeros per cell") + theme_minimal() comparison <- list(FeatureData = fData.all, PhenoData = pData.all, Plots = list(Means = means, Variances = vars, MeanVar = mean.var, LibrarySizes = libs, ZerosGene = z.gene, ZerosCell = z.cell)) return(comparison) }