diff --git a/DESCRIPTION b/DESCRIPTION index e6aee3ec4b01f7419dce8ab71d6ec8afb296607d..5b99b77dab69425790e5ae6b839266d44d0a6f68 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 0.99.11 -Date: 2017-03-20 +Version: 0.99.12 +Date: 2017-03-22 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), @@ -51,5 +51,5 @@ biocViews: SingleCell, RNASeq, Transcriptomics, GeneExpression, Sequencing, Software URL: https://github.com/Oshlack/splatter BugReports: https://github.com/Oshlack/splatter/issues -RoxygenNote: 6.0.0 +RoxygenNote: 6.0.1 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 4e1ed1b1e5156d9606895f2d4f1f2e5951f0cbc1..c00aebb4c71329718aad6b095bda5f76c2abe6de 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ S3method(splatEstimate,SCESet) S3method(splatEstimate,matrix) export(addGeneLengths) export(compareSCESets) +export(diffSCESets) export(getParam) export(getParams) export(listSims) @@ -57,7 +58,9 @@ importFrom(checkmate,checkLogical) importFrom(checkmate,checkNumber) importFrom(checkmate,checkNumeric) importFrom(ggplot2,aes_string) +importFrom(ggplot2,geom_abline) importFrom(ggplot2,geom_boxplot) +importFrom(ggplot2,geom_hline) importFrom(ggplot2,geom_point) importFrom(ggplot2,geom_smooth) importFrom(ggplot2,geom_violin) diff --git a/NEWS.md b/NEWS.md index 4877c31f0e88ed359e670b1d1b5447ad310b8678..308481dee215ae38435b069d9ff3024d2b365467 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# 0.99.12 + +* Add diffSCESets function +* Update compareSCESets plots +* Modify Lun2 nGenes estimate +* Modify how addFeatureStats names columns +* Add infinte bcv.df warning to splatSimulate + # 0.99.11 * Add parallel option to lun2Estimate diff --git a/R/SCESet-functions.R b/R/SCESet-functions.R index 63178257a4053fd8b9580561372e3f4b26048d88..2a92c3545c2aa80652533158bbd89168b65f5735 100644 --- a/R/SCESet-functions.R +++ b/R/SCESet-functions.R @@ -14,8 +14,10 @@ #' @details #' Currently adds the following statistics: mean, variance, coefficient of #' variation, median and median absolute deviation. Statistics are added to -#' the \code{fData} slot and are named \code{stat_[log]_value_[no0]} where -#' \code{log} and \code{no0} are added if those arguments are true. +#' the \code{fData} slot and are named \code{Stat[Log]Value[No0]} where +#' \code{Log} and \code{No0} are added if those arguments are true. +#' UpperCamelCase is used to differentiate these columns from those added by +#' \code{scater}. #' #' @return SCESet with additional feature statistics #' @@ -28,35 +30,37 @@ addFeatureStats <- function(sce, value = c("counts", "cpm", "tpm", "fpkm"), switch(value, counts = { values = scater::counts(sce) + suffix <- "Counts" }, cpm = { values = scater::cpm(sce) + suffix <- "CPM" }, tpm = { values = scater::tpm(sce) + suffix <- "TPM" }, fpkm = { values = scater::fpkm(sce) + suffix <- "FPKM" } ) - suffix <- value - if (no.zeros) { values[values == 0] <- NA - suffix = paste0(suffix, "_no0") + suffix = paste0(suffix, "No0") } if (log) { values = log2(values + offset) - suffix = paste0("log_", suffix) + suffix = paste0("Log", suffix) } - mean.str <- paste0("mean_", suffix) - var.str <- paste0("var_", suffix) - cv.str <- paste0("cv_", suffix) - med.str <- paste0("med_", suffix) - mad.str <- paste0("mad_", suffix) + mean.str <- paste0("Mean", suffix) + var.str <- paste0("Var", suffix) + cv.str <- paste0("CV", suffix) + med.str <- paste0("Med", suffix) + mad.str <- paste0("MAD", suffix) fData(sce)[, mean.str] <- rowMeans(values, na.rm = TRUE) fData(sce)[, var.str] <- matrixStats::rowVars(values, na.rm = TRUE) diff --git a/R/compare.R b/R/compare.R index 76de2ab8eaf18d0a0ebadd2aa3435c64fc40594e..23aa2710ccaf96391156870e6d71c745eb749580 100644 --- a/R/compare.R +++ b/R/compare.R @@ -1,12 +1,12 @@ #' Compare SCESet objects #' #' Combine the data from several SCESet objects and produce some basic plots -#' comparing. +#' comparing them. #' #' @param sces named list of SCESet objects to combine and compare. #' #' @details -#' The return list has three items: +#' The returned list has three items: #' #' \describe{ #' \item{\code{FeatureData}}{Combined feature data from the provided @@ -14,8 +14,8 @@ #' \item{\code{PhenoData}}{Combined pheno data from the provided SCESets.} #' \item{\code{Plots}}{Comparison plots #' \describe{ -#' \item{\code{Means}}{Violin plot of mean distribution.} -#' \item{\code{Variances}}{Violin plot of variance distribution.} +#' \item{\code{Means}}{Boxplot of mean distribution.} +#' \item{\code{Variances}}{Boxplot of variance distribution.} #' \item{\code{MeanVar}}{Scatter plot with fitted lines showing the #' mean-variance relationship.} #' \item{\code{LibraySizes}}{Boxplot of the library size @@ -80,24 +80,26 @@ compareSCESets <- function(sces) { pData.all$Dataset <- factor(pData.all$Dataset, levels = names(sces)) means <- ggplot(fData.all, - aes_string(x = "Dataset", y = "mean_log_cpm", + aes_string(x = "Dataset", y = "MeanLogCPM", colour = "Dataset")) + - geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + + #geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + + geom_boxplot() + ylab(expression(paste("Mean ", log[2], "(CPM + 1)"))) + ggtitle("Distribution of mean expression") + theme_minimal() vars <- ggplot(fData.all, - aes_string(x = "Dataset", y = "var_cpm", + aes_string(x = "Dataset", y = "VarCPM", colour = "Dataset")) + - geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + + #geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + + geom_boxplot() + scale_y_log10(labels = scales::comma) + ylab("CPM Variance") + ggtitle("Distribution of variance") + theme_minimal() mean.var <- ggplot(fData.all, - aes_string(x = "mean_log_cpm", y = "var_log_cpm", + aes_string(x = "MeanLogCPM", y = "VarLogCPM", colour = "Dataset", fill = "Dataset")) + geom_point(size = 0.1, alpha = 0.1) + geom_smooth() + @@ -134,7 +136,7 @@ compareSCESets <- function(sces) { theme_minimal() mean.zeros <- ggplot(fData.all, - aes_string(x = "mean_counts", y = "pct_dropout", + aes_string(x = "MeanCounts", y = "pct_dropout", colour = "Dataset", fill = "Dataset")) + geom_point(size = 0.1, alpha = 0.1) + geom_smooth() + @@ -156,3 +158,289 @@ compareSCESets <- function(sces) { return(comparison) } + +#' Diff SCESet objects +#' +#' Combine the data from several SCESet objects and produce some basic plots +#' comparing them to a reference. +#' +#' @param sces named list of SCESet objects to combine and compare. +#' @param ref string giving the name of the SCESet to use as the reference +#' +#' @details +#' +#' This function aims to look at the differences between a reference SCESet and +#' one or more others. It requires each SCESet to have the same dimensions. +#' Properties are compared by ranks, for example when comparing the means the +#' values are ordered and the differences between the reference and another +#' dataset plotted. A series of Q-Q plots are also returned. +#' +#' The returned list has five items: +#' +#' \describe{ +#' \item{\code{Reference}}{The SCESet used as the reference.} +#' \item{\code{FeatureData}}{Combined feature data from the provided +#' SCESets.} +#' \item{\code{PhenoData}}{Combined pheno data from the provided SCESets.} +#' \item{\code{Plots}}{Difference plots +#' \describe{ +#' \item{\code{Means}}{Boxplot of mean differences.} +#' \item{\code{Variances}}{Boxplot of variance differences.} +#' \item{\code{MeanVar}}{Scatter plot showing the difference from +#' the reference variance across expression ranks.} +#' \item{\code{LibraySizes}}{Boxplot of the library size +#' differences.} +#' \item{\code{ZerosGene}}{Boxplot of the differences in the +#' percentage of each gene that is zero.} +#' \item{\code{ZerosCell}}{Boxplot of the differences in the +#' percentage of each cell that is zero.} +#' \item{\code{MeanZeros}}{Scatter plot showing the difference from +#' the reference percentage of zeros across expression ranks.} +#' } +#' } +#' \item{\code{QQPlots}}{Quantile-Quantile plots +#' \describe{ +#' \item{\code{Means}}{Q-Q plot of the means.} +#' \item{\code{Variances}}{Q-Q plot of the variances.} +#' \item{\code{LibrarySizes}}{Q-Q plot of the library sizes.} +#' \item{\code{ZerosGene}}{Q-Q plot of the percentage of zeros per +#' gene.} +#' \item{\code{ZerosCell}}{Q-Q plot of the percentage of zeros per +#' cell.} +#' } +#' } +#' } +#' +#' The plots returned by this function are created using +#' \code{\link[ggplot2]{ggplot}} and are only a sample of the kind of plots you +#' might like to consider. The data used to create these plots is also returned +#' and should be in the correct format to allow you to create further plots +#' using \code{\link[ggplot2]{ggplot}}. +#' +#' @return List containing the combined datasets and plots. +#' @examples +#' sim1 <- splatSimulate(nGenes = 1000, groupCells = 20) +#' sim2 <- simpleSimulate(nGenes = 1000, nCells = 20) +#' difference <- diffSCESets(list(Splat = sim1, Simple = sim2), ref = "Simple") +#' names(difference) +#' names(difference$Plots) +#' @importFrom ggplot2 ggplot aes_string geom_point geom_boxplot xlab ylab +#' ggtitle theme_minimal geom_hline geom_abline +#' @importFrom scater cpm<- +#' @export +diffSCESets <- function(sces, ref) { + + checkmate::assertList(sces, types = "SCESet", any.missing = FALSE, + min.len = 2, names = "unique") + checkmate::assertString(ref) + + if (!(ref %in% names(sces))) { + stop("'ref' must be the name of an SCESet in 'sces'") + } + + ref.dim <- dim(sces[[ref]]) + + for (name in names(sces)) { + sce <- sces[[name]] + if (!identical(dim(sce), ref.dim)) { + stop("SCESets must have the same dimensions") + } + fData(sce)$Dataset <- name + pData(sce)$Dataset <- name + sce <- scater::calculateQCMetrics(sce) + cpm(sce) <- edgeR::cpm(counts(sce)) + sce <- addFeatureStats(sce, "counts") + sce <- addFeatureStats(sce, "cpm", log = TRUE) + sces[[name]] <- sce + } + + ref.sce <- sces[[ref]] + + ref.means <- sort(fData(ref.sce)$MeanLogCPM) + ref.vars <- sort(fData(ref.sce)$VarLogCPM) + ref.libs <- sort(pData(ref.sce)$total_counts) + ref.z.gene <- sort(fData(ref.sce)$pct_dropout) + ref.z.cell <- sort(pData(ref.sce)$pct_dropout) + + ref.rank.ord <- order(fData(ref.sce)$exprs_rank) + ref.vars.rank <- fData(ref.sce)$VarLogCPM[ref.rank.ord] + ref.z.gene.rank <- fData(ref.sce)$pct_dropout[ref.rank.ord] + + for (name in names(sces)) { + sce <- sces[[name]] + fData(sce)$RefRankMeanLogCPM <- ref.means[rank(fData(sce)$MeanLogCPM)] + fData(sce)$RankDiffMeanLogCPM <- fData(sce)$MeanLogCPM - + fData(sce)$RefRankMeanLogCPM + fData(sce)$RefRankVarLogCPM <- ref.vars[rank(fData(sce)$VarLogCPM)] + fData(sce)$RankDiffVarLogCPM <- fData(sce)$VarLogCPM - + fData(sce)$RefRankVarLogCPM + pData(sce)$RefRankLibSize <- ref.libs[rank(pData(sce)$total_counts)] + pData(sce)$RankDiffLibSize <- pData(sce)$total_counts - + pData(sce)$RefRankLibSize + fData(sce)$RefRankZeros <- ref.z.gene[rank(fData(sce)$pct_dropout)] + fData(sce)$RankDiffZeros <- fData(sce)$pct_dropout - + fData(sce)$RefRankZeros + pData(sce)$RefRankZeros <- ref.z.cell[rank(pData(sce)$pct_dropout)] + pData(sce)$RankDiffZeros <- pData(sce)$pct_dropout - + pData(sce)$RefRankZeros + + fData(sce)$MeanRankVarDiff <- fData(sce)$VarLogCPM - + ref.vars.rank[fData(sce)$exprs_rank] + fData(sce)$MeanRankZerosDiff <- fData(sce)$pct_dropout - + ref.z.gene.rank[fData(sce)$exprs_rank] + + sces[[name]] <- sce + } + + ref.sce <- sces[[ref]] + sces[[ref]] <- NULL + + fData.all <- fData(sces[[1]]) + pData.all <- pData(sces[[1]]) + + if (length(sces) > 1) { + for (name in names(sces)[-1]) { + sce <- sces[[name]] + fData.all <- rbindMatched(fData.all, fData(sce)) + pData.all <- rbindMatched(pData.all, pData(sce)) + } + } + + fData.all$Dataset <- factor(fData.all$Dataset, levels = names(sces)) + pData.all$Dataset <- factor(pData.all$Dataset, levels = names(sces)) + + means <- ggplot(fData.all, + aes_string(x = "Dataset", y = "RankDiffMeanLogCPM", + colour = "Dataset")) + + geom_hline(yintercept = 0, colour = "red") + + geom_boxplot() + + ylab(expression(paste("Rank difference mean ", log[2], "(CPM + 1)"))) + + ggtitle("Difference in mean expression") + + theme_minimal() + + vars <- ggplot(fData.all, + aes_string(x = "Dataset", y = "RankDiffVarLogCPM", + colour = "Dataset")) + + geom_hline(yintercept = 0, colour = "red") + + geom_boxplot() + + ylab(expression(paste("Rank difference variance ", log[2], + "(CPM + 1)"))) + + ggtitle("Difference in variance") + + theme_minimal() + + mean.var <- ggplot(fData.all, + aes_string(x = "exprs_rank", y = "MeanRankVarDiff", + colour = "Dataset")) + + geom_hline(yintercept = 0, colour = "red") + + geom_point() + + xlab("Expression rank") + + ylab(expression(paste("Difference in variance ", log[2], + "(CPM + 1)"))) + + ggtitle("Difference in mean-variance relationship") + + theme_minimal() + + libs <- ggplot(pData.all, + aes_string(x = "Dataset", y = "RankDiffLibSize", + colour = "Dataset")) + + geom_hline(yintercept = 0, colour = "red") + + geom_boxplot() + + ylab(paste("Rank difference libray size")) + + ggtitle("Difference in library sizes") + + theme_minimal() + + z.gene <- ggplot(fData.all, + aes_string(x = "Dataset", y = "RankDiffZeros", + colour = "Dataset")) + + geom_hline(yintercept = 0, colour = "red") + + geom_boxplot() + + ylab(paste("Rank difference percentage zeros")) + + ggtitle("Difference in zeros per gene") + + theme_minimal() + + z.cell <- ggplot(pData.all, + aes_string(x = "Dataset", y = "RankDiffZeros", + colour = "Dataset")) + + geom_hline(yintercept = 0, colour = "red") + + geom_boxplot() + + ylab(paste("Rank difference percentage zeros")) + + ggtitle("Difference in zeros per cell") + + theme_minimal() + + mean.zeros <- ggplot(fData.all, + aes_string(x = "exprs_rank", y = "MeanRankZerosDiff", + colour = "Dataset")) + + geom_hline(yintercept = 0, colour = "red") + + geom_point() + + xlab("Expression rank") + + ylab("Difference in percentage zeros per gene") + + ggtitle("Difference in mean-zeros relationship") + + theme_minimal() + + means.qq <- ggplot(fData.all, + aes_string(x = "RefRankMeanLogCPM", y = "MeanLogCPM", + colour = "Dataset")) + + geom_abline(intercept = 0, slope = 1, colour = "red") + + geom_point() + + xlab(expression(paste("Reference mean ", log[2], "(CPM + 1)"))) + + ylab(expression(paste("Alternative mean ", log[2], "(CPM + 1)"))) + + ggtitle("Ranked means") + + theme_minimal() + + vars.qq <- ggplot(fData.all, + aes_string(x = "RefRankVarLogCPM", y = "VarLogCPM", + colour = "Dataset")) + + geom_abline(intercept = 0, slope = 1, colour = "red") + + geom_point() + + xlab(expression(paste("Reference variance ", log[2], "(CPM + 1)"))) + + ylab(expression(paste("Alternative variance ", log[2], "(CPM + 1)"))) + + ggtitle("Ranked variances") + + theme_minimal() + + libs.qq <- ggplot(pData.all, + aes_string(x = "RefRankLibSize", y = "total_counts", + colour = "Dataset")) + + geom_abline(intercept = 0, slope = 1, colour = "red") + + geom_point() + + xlab("Reference library size") + + ylab("Alternative library size") + + ggtitle("Ranked library sizes") + + theme_minimal() + + z.gene.qq <- ggplot(fData.all, + aes_string(x = "RefRankZeros", y = "pct_dropout", + colour = "Dataset")) + + geom_abline(intercept = 0, slope = 1, colour = "red") + + geom_point() + + xlab("Reference percentage zeros") + + ylab("Alternative percentage zeros") + + ggtitle("Ranked percentage zeros per gene") + + theme_minimal() + + z.cell.qq <- ggplot(pData.all, + aes_string(x = "RefRankZeros", y = "pct_dropout", + colour = "Dataset")) + + geom_abline(intercept = 0, slope = 1, colour = "red") + + geom_point() + + xlab("Reference percentage zeros") + + ylab("Alternative percentage zeros") + + ggtitle("Ranked percentage zeros per cell") + + theme_minimal() + + comparison <- list(Reference = ref.sce, + FeatureData = fData.all, + PhenoData = pData.all, + Plots = list(Means = means, + Variances = vars, + MeanVar = mean.var, + LibrarySizes = libs, + ZerosGene = z.gene, + ZerosCell = z.cell, + MeanZeros = mean.zeros), + QQPlots = list(Means = means.qq, + Variances = vars.qq, + LibrarySizes = libs.qq, + ZerosGene = z.gene.qq, + ZerosCell = z.cell.qq)) + + return(comparison) +} diff --git a/R/lun2-estimate.R b/R/lun2-estimate.R index cde774baf9b402bb2baa54e07a38caaa35718e82..6a5f53009fe549522faabb49f478b03430fc0849 100644 --- a/R/lun2-estimate.R +++ b/R/lun2-estimate.R @@ -185,7 +185,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop)) - params <- setParams(params, nGenes = length(logmeans), + params <- setParams(params, nGenes = nrow(counts), cell.plates = plates, plate.var = sigma2, gene.means = exp(logmeans), gene.disps = dge$tagwise.dispersion, diff --git a/R/splat-simulate.R b/R/splat-simulate.R index 70a987d72786c51cfc31f321805de3da84bb5e76..1feb4cf6d31423aa940820fee2dfe9bb39059b2c 100644 --- a/R/splat-simulate.R +++ b/R/splat-simulate.R @@ -192,7 +192,7 @@ splatSimulate <- function(params = newSplatParams(), sim <- splatSimBCVMeans(sim, params) if (verbose) {message("Simulating counts..")} sim <- splatSimTrueCounts(sim, params) - if (verbose) {message("Simulating dropout...")} + if (verbose) {message("Simulating dropout (if needed)...")} sim <- splatSimDropout(sim, params) if (verbose) {message("Creating final SCESet...")} @@ -537,8 +537,13 @@ splatSimBCVMeans <- function(sim, params) { bcv.df <- getParam(params, "bcv.df") base.means.cell <- get_exprs(sim, "BaseCellMeans") - bcv <- (bcv.common + (1 / sqrt(base.means.cell))) * - sqrt(bcv.df / rchisq(nGenes, df = bcv.df)) + if (is.finite(bcv.df)) { + bcv <- (bcv.common + (1 / sqrt(base.means.cell))) * + sqrt(bcv.df / rchisq(nGenes, df = bcv.df)) + } else { + warning("'bcv.df' is infinite. This parameter will be ignored.") + bcv <- (bcv.common + (1 / sqrt(base.means.cell))) + } means.cell <- matrix(rgamma(nGenes * nCells, shape = 1 / (bcv ^ 2), scale = base.means.cell * (bcv ^ 2)), diff --git a/man/addFeatureStats.Rd b/man/addFeatureStats.Rd index 8d454be2663c89d8ccada477845e84b212f36550..f4f72ef4cd05c914a0cac48746a6118102d5f1ed 100644 --- a/man/addFeatureStats.Rd +++ b/man/addFeatureStats.Rd @@ -30,6 +30,8 @@ Add additional feature statistics to an SCESet object \details{ Currently adds the following statistics: mean, variance, coefficient of variation, median and median absolute deviation. Statistics are added to -the \code{fData} slot and are named \code{stat_[log]_value_[no0]} where -\code{log} and \code{no0} are added if those arguments are true. +the \code{fData} slot and are named \code{Stat[Log]Value[No0]} where +\code{Log} and \code{No0} are added if those arguments are true. +UpperCamelCase is used to differentiate these columns from those added by +\code{scater}. } diff --git a/man/compareSCESets.Rd b/man/compareSCESets.Rd index 14eb59dca842ebf7e10f78a612da9155b6b73a8c..e0533d3114487674ac807388762d2e1f1c41cb28 100644 --- a/man/compareSCESets.Rd +++ b/man/compareSCESets.Rd @@ -14,10 +14,10 @@ List containing the combined datasets and plots. } \description{ Combine the data from several SCESet objects and produce some basic plots -comparing. +comparing them. } \details{ -The return list has three items: +The returned list has three items: \describe{ \item{\code{FeatureData}}{Combined feature data from the provided @@ -25,8 +25,8 @@ The return list has three items: \item{\code{PhenoData}}{Combined pheno data from the provided SCESets.} \item{\code{Plots}}{Comparison plots \describe{ - \item{\code{Means}}{Violin plot of mean distribution.} - \item{\code{Variances}}{Violin plot of variance distribution.} + \item{\code{Means}}{Boxplot of mean distribution.} + \item{\code{Variances}}{Boxplot of variance distribution.} \item{\code{MeanVar}}{Scatter plot with fitted lines showing the mean-variance relationship.} \item{\code{LibraySizes}}{Boxplot of the library size diff --git a/man/diffSCESets.Rd b/man/diffSCESets.Rd new file mode 100644 index 0000000000000000000000000000000000000000..3fc3e60bcb04fb6ca0ed1de07fe9d8439fc48e20 --- /dev/null +++ b/man/diffSCESets.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compare.R +\name{diffSCESets} +\alias{diffSCESets} +\title{Diff SCESet objects} +\usage{ +diffSCESets(sces, ref) +} +\arguments{ +\item{sces}{named list of SCESet objects to combine and compare.} + +\item{ref}{string giving the name of the SCESet to use as the reference} +} +\value{ +List containing the combined datasets and plots. +} +\description{ +Combine the data from several SCESet objects and produce some basic plots +comparing them to a reference. +} +\details{ +This function aims to look at the differences between a reference SCESet and +one or more others. It requires each SCESet to have the same dimensions. +Properties are compared by ranks, for example when comparing the means the +values are ordered and the differences between the reference and another +dataset plotted. A series of Q-Q plots are also returned. + +The returned list has five items: + +\describe{ + \item{\code{Reference}}{The SCESet used as the reference.} + \item{\code{FeatureData}}{Combined feature data from the provided + SCESets.} + \item{\code{PhenoData}}{Combined pheno data from the provided SCESets.} + \item{\code{Plots}}{Difference plots + \describe{ + \item{\code{Means}}{Boxplot of mean differences.} + \item{\code{Variances}}{Boxplot of variance differences.} + \item{\code{MeanVar}}{Scatter plot showing the difference from + the reference variance across expression ranks.} + \item{\code{LibraySizes}}{Boxplot of the library size + differences.} + \item{\code{ZerosGene}}{Boxplot of the differences in the + percentage of each gene that is zero.} + \item{\code{ZerosCell}}{Boxplot of the differences in the + percentage of each cell that is zero.} + \item{\code{MeanZeros}}{Scatter plot showing the difference from + the reference percentage of zeros across expression ranks.} + } + } + \item{\code{QQPlots}}{Quantile-Quantile plots + \describe{ + \item{\code{Means}}{Q-Q plot of the means.} + \item{\code{Variances}}{Q-Q plot of the variances.} + \item{\code{LibrarySizes}}{Q-Q plot of the library sizes.} + \item{\code{ZerosGene}}{Q-Q plot of the percentage of zeros per + gene.} + \item{\code{ZerosCell}}{Q-Q plot of the percentage of zeros per + cell.} + } + } +} + +The plots returned by this function are created using +\code{\link[ggplot2]{ggplot}} and are only a sample of the kind of plots you +might like to consider. The data used to create these plots is also returned +and should be in the correct format to allow you to create further plots +using \code{\link[ggplot2]{ggplot}}. +} +\examples{ +sim1 <- splatSimulate(nGenes = 1000, groupCells = 20) +sim2 <- simpleSimulate(nGenes = 1000, nCells = 20) +difference <- diffSCESets(list(Splat = sim1, Simple = sim2), ref = "Simple") +names(difference) +names(difference$Plots) +} diff --git a/tests/testthat/test-splat-simulate.R b/tests/testthat/test-splat-simulate.R index d5e329a4f2da69dce7749e2611acf0e31acf697e..ad563b74670749f1ae0f5f8642ac2b6fc2a4119a 100644 --- a/tests/testthat/test-splat-simulate.R +++ b/tests/testthat/test-splat-simulate.R @@ -14,4 +14,9 @@ test_that("one group switches to single mode", { "nGroups is 1, switching to single mode") expect_silent(splatSimulate(test.params, method = "paths", groupCells = c(10), verbose = FALSE)) -}) \ No newline at end of file +}) + +test_that("infinite bcv.df is detected", { + expect_warning(splatSimulate(test.params, bcv.df = Inf), + "'bcv.df' is infinite. This parameter will be ignored.") +}) diff --git a/vignettes/splatter.Rmd b/vignettes/splatter.Rmd index 03eb98f35c1ec9b7494f15f0be590b1a49f53977..376ba8c104ecb87ddb4e36679b9b77b7e0f573b1 100644 --- a/vignettes/splatter.Rmd +++ b/vignettes/splatter.Rmd @@ -452,6 +452,27 @@ ggplot(comparison$PhenoData, geom_point() ``` +## Comparing differences + +Sometimes instead of visually comparing datasets it may be more interesting to +look at the differences between them. We can do this using the `diffSCESets` +function. Similar to `compareSCESets` this function takes a list of `SCESet` +objects but now we also specify one to be a reference. A series of similar plots +are returned but instead of showing the overall distributions they demonstrate +differences from the reference. + +```{r difference} +difference <- diffSCESets(list(Splat = sim1, Simple = sim2), ref = "Simple") +difference$Plots$Means +``` + +We also get a series of Quantile-Quantile plot that can be used to compare +distributions. + +```{r difference-qq} +difference$QQPlots$Means +``` + # Session information {-} ```{r sessionInfo}