From 285cd00ae18e21d437544f36bcd9ff38b6115b05 Mon Sep 17 00:00:00 2001
From: Luke Zappia <luke.zappia@mcri.edu.au>
Date: Wed, 22 Mar 2017 06:44:08 +0000
Subject: [PATCH] Merge branch 'master' into devel

* master:
  Version 0.99.12
  Run checks
  Warn if bcv.df is infinite
  Update addFeatureStats col names
  Update progress message
  Switch to boxplots in compareSCESets
  Change Lun2 nGenes estimate
  Add diffSCESets to vignette
  Add diffSCESets function

From: Luke Zappia <lazappi@users.noreply.github.com>

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/splatter@127605 bc3139a8-67e5-0310-9ffc-ced21a209358
---
 DESCRIPTION                          |   6 +-
 NAMESPACE                            |   3 +
 NEWS.md                              |   8 +
 R/SCESet-functions.R                 |  26 ++-
 R/compare.R                          | 308 ++++++++++++++++++++++++++-
 R/lun2-estimate.R                    |   2 +-
 R/splat-simulate.R                   |  11 +-
 man/addFeatureStats.Rd               |   6 +-
 man/compareSCESets.Rd                |   8 +-
 man/diffSCESets.Rd                   |  76 +++++++
 tests/testthat/test-splat-simulate.R |   7 +-
 vignettes/splatter.Rmd               |  21 ++
 12 files changed, 447 insertions(+), 35 deletions(-)
 create mode 100644 man/diffSCESets.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index e6aee3e..5b99b77 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: splatter
 Type: Package
 Title: Simple Simulation of Single-cell RNA Sequencing Data
-Version: 0.99.11
-Date: 2017-03-20
+Version: 0.99.12
+Date: 2017-03-22
 Author: Luke Zappia
 Authors@R:
     c(person("Luke", "Zappia", role = c("aut", "cre"),
@@ -51,5 +51,5 @@ biocViews: SingleCell, RNASeq, Transcriptomics, GeneExpression, Sequencing,
     Software
 URL: https://github.com/Oshlack/splatter
 BugReports: https://github.com/Oshlack/splatter/issues
-RoxygenNote: 6.0.0
+RoxygenNote: 6.0.1
 VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index 4e1ed1b..c00aebb 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,6 +12,7 @@ S3method(splatEstimate,SCESet)
 S3method(splatEstimate,matrix)
 export(addGeneLengths)
 export(compareSCESets)
+export(diffSCESets)
 export(getParam)
 export(getParams)
 export(listSims)
@@ -57,7 +58,9 @@ importFrom(checkmate,checkLogical)
 importFrom(checkmate,checkNumber)
 importFrom(checkmate,checkNumeric)
 importFrom(ggplot2,aes_string)
+importFrom(ggplot2,geom_abline)
 importFrom(ggplot2,geom_boxplot)
+importFrom(ggplot2,geom_hline)
 importFrom(ggplot2,geom_point)
 importFrom(ggplot2,geom_smooth)
 importFrom(ggplot2,geom_violin)
diff --git a/NEWS.md b/NEWS.md
index 4877c31..308481d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,11 @@
+# 0.99.12
+
+* Add diffSCESets function
+* Update compareSCESets plots
+* Modify Lun2 nGenes estimate
+* Modify how addFeatureStats names columns
+* Add infinte bcv.df warning to splatSimulate
+
 # 0.99.11
 
 * Add parallel option to lun2Estimate
diff --git a/R/SCESet-functions.R b/R/SCESet-functions.R
index 6317825..2a92c35 100644
--- a/R/SCESet-functions.R
+++ b/R/SCESet-functions.R
@@ -14,8 +14,10 @@
 #' @details
 #' Currently adds the following statistics: mean, variance, coefficient of
 #' variation, median and median absolute deviation. Statistics are added to
-#' the \code{fData} slot and are named \code{stat_[log]_value_[no0]} where
-#' \code{log} and \code{no0} are added if those arguments are true.
+#' the \code{fData} slot and are named \code{Stat[Log]Value[No0]} where
+#' \code{Log} and \code{No0} are added if those arguments are true.
+#' UpperCamelCase is used to differentiate these columns from those added by
+#' \code{scater}.
 #'
 #' @return SCESet with additional feature statistics
 #'
@@ -28,35 +30,37 @@ addFeatureStats <- function(sce, value = c("counts", "cpm", "tpm", "fpkm"),
     switch(value,
            counts = {
                values = scater::counts(sce)
+               suffix <- "Counts"
            },
            cpm = {
                values = scater::cpm(sce)
+               suffix <- "CPM"
            },
            tpm = {
                values = scater::tpm(sce)
+               suffix <- "TPM"
            },
            fpkm = {
                values = scater::fpkm(sce)
+               suffix <- "FPKM"
            }
     )
 
-    suffix <- value
-
     if (no.zeros) {
         values[values == 0] <- NA
-        suffix = paste0(suffix, "_no0")
+        suffix = paste0(suffix, "No0")
     }
 
     if (log) {
         values = log2(values + offset)
-        suffix = paste0("log_", suffix)
+        suffix = paste0("Log", suffix)
     }
 
-    mean.str <- paste0("mean_", suffix)
-    var.str  <- paste0("var_",  suffix)
-    cv.str   <- paste0("cv_",   suffix)
-    med.str  <- paste0("med_",  suffix)
-    mad.str  <- paste0("mad_",  suffix)
+    mean.str <- paste0("Mean", suffix)
+    var.str  <- paste0("Var",  suffix)
+    cv.str   <- paste0("CV",   suffix)
+    med.str  <- paste0("Med",  suffix)
+    mad.str  <- paste0("MAD",  suffix)
 
     fData(sce)[, mean.str] <- rowMeans(values, na.rm = TRUE)
     fData(sce)[, var.str]  <- matrixStats::rowVars(values, na.rm = TRUE)
diff --git a/R/compare.R b/R/compare.R
index 76de2ab..23aa271 100644
--- a/R/compare.R
+++ b/R/compare.R
@@ -1,12 +1,12 @@
 #' Compare SCESet objects
 #'
 #' Combine the data from several SCESet objects and produce some basic plots
-#' comparing.
+#' comparing them.
 #'
 #' @param sces named list of SCESet objects to combine and compare.
 #'
 #' @details
-#' The return list has three items:
+#' The returned list has three items:
 #'
 #' \describe{
 #'     \item{\code{FeatureData}}{Combined feature data from the provided
@@ -14,8 +14,8 @@
 #'     \item{\code{PhenoData}}{Combined pheno data from the provided SCESets.}
 #'     \item{\code{Plots}}{Comparison plots
 #'         \describe{
-#'             \item{\code{Means}}{Violin plot of mean distribution.}
-#'             \item{\code{Variances}}{Violin plot of variance distribution.}
+#'             \item{\code{Means}}{Boxplot of mean distribution.}
+#'             \item{\code{Variances}}{Boxplot of variance distribution.}
 #'             \item{\code{MeanVar}}{Scatter plot with fitted lines showing the
 #'             mean-variance relationship.}
 #'             \item{\code{LibraySizes}}{Boxplot of the library size
@@ -80,24 +80,26 @@ compareSCESets <- function(sces) {
     pData.all$Dataset <- factor(pData.all$Dataset, levels = names(sces))
 
     means <- ggplot(fData.all,
-                    aes_string(x = "Dataset", y = "mean_log_cpm",
+                    aes_string(x = "Dataset", y = "MeanLogCPM",
                                colour = "Dataset")) +
-        geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
+        #geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
+        geom_boxplot() +
         ylab(expression(paste("Mean ", log[2], "(CPM + 1)"))) +
         ggtitle("Distribution of mean expression") +
         theme_minimal()
 
     vars <- ggplot(fData.all,
-                   aes_string(x = "Dataset", y = "var_cpm",
+                   aes_string(x = "Dataset", y = "VarCPM",
                               colour = "Dataset")) +
-        geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
+        #geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
+        geom_boxplot() +
         scale_y_log10(labels = scales::comma) +
         ylab("CPM Variance") +
         ggtitle("Distribution of variance") +
         theme_minimal()
 
     mean.var <- ggplot(fData.all,
-                       aes_string(x = "mean_log_cpm", y = "var_log_cpm",
+                       aes_string(x = "MeanLogCPM", y = "VarLogCPM",
                                   colour = "Dataset", fill = "Dataset")) +
         geom_point(size = 0.1, alpha = 0.1) +
         geom_smooth() +
@@ -134,7 +136,7 @@ compareSCESets <- function(sces) {
         theme_minimal()
 
     mean.zeros <- ggplot(fData.all,
-                         aes_string(x = "mean_counts", y = "pct_dropout",
+                         aes_string(x = "MeanCounts", y = "pct_dropout",
                                     colour = "Dataset", fill = "Dataset")) +
         geom_point(size = 0.1, alpha = 0.1) +
         geom_smooth() +
@@ -156,3 +158,289 @@ compareSCESets <- function(sces) {
 
     return(comparison)
 }
+
+#' Diff SCESet objects
+#'
+#' Combine the data from several SCESet objects and produce some basic plots
+#' comparing them to a reference.
+#'
+#' @param sces named list of SCESet objects to combine and compare.
+#' @param ref string giving the name of the SCESet to use as the reference
+#'
+#' @details
+#'
+#' This function aims to look at the differences between a reference SCESet and
+#' one or more others. It requires each SCESet to have the same dimensions.
+#' Properties are compared by ranks, for example when comparing the means the
+#' values are ordered and the differences between the reference and another
+#' dataset plotted. A series of Q-Q plots are also returned.
+#'
+#' The returned list has five items:
+#'
+#' \describe{
+#'     \item{\code{Reference}}{The SCESet used as the reference.}
+#'     \item{\code{FeatureData}}{Combined feature data from the provided
+#'     SCESets.}
+#'     \item{\code{PhenoData}}{Combined pheno data from the provided SCESets.}
+#'     \item{\code{Plots}}{Difference plots
+#'         \describe{
+#'             \item{\code{Means}}{Boxplot of mean differences.}
+#'             \item{\code{Variances}}{Boxplot of variance differences.}
+#'             \item{\code{MeanVar}}{Scatter plot showing the difference from
+#'             the reference variance across expression ranks.}
+#'             \item{\code{LibraySizes}}{Boxplot of the library size
+#'             differences.}
+#'             \item{\code{ZerosGene}}{Boxplot of the differences in the
+#'             percentage of each gene that is zero.}
+#'             \item{\code{ZerosCell}}{Boxplot of the differences in the
+#'             percentage of each cell that is zero.}
+#'             \item{\code{MeanZeros}}{Scatter plot showing the difference from
+#'             the reference percentage of zeros across expression ranks.}
+#'     }
+#'   }
+#'   \item{\code{QQPlots}}{Quantile-Quantile plots
+#'       \describe{
+#'           \item{\code{Means}}{Q-Q plot of the means.}
+#'           \item{\code{Variances}}{Q-Q plot of the variances.}
+#'           \item{\code{LibrarySizes}}{Q-Q plot of the library sizes.}
+#'           \item{\code{ZerosGene}}{Q-Q plot of the percentage of zeros per
+#'           gene.}
+#'           \item{\code{ZerosCell}}{Q-Q plot of the percentage of zeros per
+#'           cell.}
+#'       }
+#'   }
+#' }
+#'
+#' The plots returned by this function are created using
+#' \code{\link[ggplot2]{ggplot}} and are only a sample of the kind of plots you
+#' might like to consider. The data used to create these plots is also returned
+#' and should be in the correct format to allow you to create further plots
+#' using \code{\link[ggplot2]{ggplot}}.
+#'
+#' @return List containing the combined datasets and plots.
+#' @examples
+#' sim1 <- splatSimulate(nGenes = 1000, groupCells = 20)
+#' sim2 <- simpleSimulate(nGenes = 1000, nCells = 20)
+#' difference <- diffSCESets(list(Splat = sim1, Simple = sim2), ref = "Simple")
+#' names(difference)
+#' names(difference$Plots)
+#' @importFrom ggplot2 ggplot aes_string geom_point geom_boxplot xlab ylab
+#' ggtitle theme_minimal geom_hline geom_abline
+#' @importFrom scater cpm<-
+#' @export
+diffSCESets <- function(sces, ref) {
+
+    checkmate::assertList(sces, types = "SCESet", any.missing = FALSE,
+                          min.len = 2, names = "unique")
+    checkmate::assertString(ref)
+
+    if (!(ref %in% names(sces))) {
+        stop("'ref' must be the name of an SCESet in 'sces'")
+    }
+
+    ref.dim <- dim(sces[[ref]])
+
+    for (name in names(sces)) {
+        sce <- sces[[name]]
+        if (!identical(dim(sce), ref.dim)) {
+            stop("SCESets must have the same dimensions")
+        }
+        fData(sce)$Dataset <- name
+        pData(sce)$Dataset <- name
+        sce <- scater::calculateQCMetrics(sce)
+        cpm(sce) <- edgeR::cpm(counts(sce))
+        sce <- addFeatureStats(sce, "counts")
+        sce <- addFeatureStats(sce, "cpm", log = TRUE)
+        sces[[name]] <- sce
+    }
+
+    ref.sce <- sces[[ref]]
+
+    ref.means <- sort(fData(ref.sce)$MeanLogCPM)
+    ref.vars <- sort(fData(ref.sce)$VarLogCPM)
+    ref.libs <- sort(pData(ref.sce)$total_counts)
+    ref.z.gene <- sort(fData(ref.sce)$pct_dropout)
+    ref.z.cell <- sort(pData(ref.sce)$pct_dropout)
+
+    ref.rank.ord <- order(fData(ref.sce)$exprs_rank)
+    ref.vars.rank <- fData(ref.sce)$VarLogCPM[ref.rank.ord]
+    ref.z.gene.rank <- fData(ref.sce)$pct_dropout[ref.rank.ord]
+
+    for (name in names(sces)) {
+        sce <- sces[[name]]
+        fData(sce)$RefRankMeanLogCPM <- ref.means[rank(fData(sce)$MeanLogCPM)]
+        fData(sce)$RankDiffMeanLogCPM <- fData(sce)$MeanLogCPM -
+            fData(sce)$RefRankMeanLogCPM
+        fData(sce)$RefRankVarLogCPM <- ref.vars[rank(fData(sce)$VarLogCPM)]
+        fData(sce)$RankDiffVarLogCPM <- fData(sce)$VarLogCPM -
+            fData(sce)$RefRankVarLogCPM
+        pData(sce)$RefRankLibSize <- ref.libs[rank(pData(sce)$total_counts)]
+        pData(sce)$RankDiffLibSize <- pData(sce)$total_counts -
+            pData(sce)$RefRankLibSize
+        fData(sce)$RefRankZeros <- ref.z.gene[rank(fData(sce)$pct_dropout)]
+        fData(sce)$RankDiffZeros <- fData(sce)$pct_dropout -
+            fData(sce)$RefRankZeros
+        pData(sce)$RefRankZeros <- ref.z.cell[rank(pData(sce)$pct_dropout)]
+        pData(sce)$RankDiffZeros <- pData(sce)$pct_dropout -
+            pData(sce)$RefRankZeros
+
+        fData(sce)$MeanRankVarDiff <- fData(sce)$VarLogCPM -
+            ref.vars.rank[fData(sce)$exprs_rank]
+        fData(sce)$MeanRankZerosDiff <- fData(sce)$pct_dropout -
+            ref.z.gene.rank[fData(sce)$exprs_rank]
+
+        sces[[name]] <- sce
+    }
+
+    ref.sce <- sces[[ref]]
+    sces[[ref]] <- NULL
+
+    fData.all <- fData(sces[[1]])
+    pData.all <- pData(sces[[1]])
+
+    if (length(sces) > 1) {
+        for (name in names(sces)[-1]) {
+            sce <- sces[[name]]
+            fData.all <- rbindMatched(fData.all, fData(sce))
+            pData.all <- rbindMatched(pData.all, pData(sce))
+        }
+    }
+
+    fData.all$Dataset <- factor(fData.all$Dataset, levels = names(sces))
+    pData.all$Dataset <- factor(pData.all$Dataset, levels = names(sces))
+
+    means <- ggplot(fData.all,
+                    aes_string(x = "Dataset", y = "RankDiffMeanLogCPM",
+                               colour = "Dataset")) +
+        geom_hline(yintercept = 0, colour = "red") +
+        geom_boxplot() +
+        ylab(expression(paste("Rank difference mean ", log[2], "(CPM + 1)"))) +
+        ggtitle("Difference in mean expression") +
+        theme_minimal()
+
+    vars <- ggplot(fData.all,
+                    aes_string(x = "Dataset", y = "RankDiffVarLogCPM",
+                               colour = "Dataset")) +
+        geom_hline(yintercept = 0, colour = "red") +
+        geom_boxplot() +
+        ylab(expression(paste("Rank difference variance ", log[2],
+                              "(CPM + 1)"))) +
+        ggtitle("Difference in variance") +
+        theme_minimal()
+
+    mean.var <- ggplot(fData.all,
+                       aes_string(x = "exprs_rank", y = "MeanRankVarDiff",
+                                  colour = "Dataset")) +
+        geom_hline(yintercept = 0, colour = "red") +
+        geom_point() +
+        xlab("Expression rank") +
+        ylab(expression(paste("Difference in variance ", log[2],
+                              "(CPM + 1)"))) +
+        ggtitle("Difference in mean-variance relationship") +
+        theme_minimal()
+
+    libs <- ggplot(pData.all,
+                   aes_string(x = "Dataset", y = "RankDiffLibSize",
+                              colour = "Dataset")) +
+        geom_hline(yintercept = 0, colour = "red") +
+        geom_boxplot() +
+        ylab(paste("Rank difference libray size")) +
+        ggtitle("Difference in library sizes") +
+        theme_minimal()
+
+    z.gene <- ggplot(fData.all,
+                     aes_string(x = "Dataset", y = "RankDiffZeros",
+                                colour = "Dataset")) +
+        geom_hline(yintercept = 0, colour = "red") +
+        geom_boxplot() +
+        ylab(paste("Rank difference percentage zeros")) +
+        ggtitle("Difference in zeros per gene") +
+        theme_minimal()
+
+    z.cell <- ggplot(pData.all,
+                     aes_string(x = "Dataset", y = "RankDiffZeros",
+                                colour = "Dataset")) +
+        geom_hline(yintercept = 0, colour = "red") +
+        geom_boxplot() +
+        ylab(paste("Rank difference percentage zeros")) +
+        ggtitle("Difference in zeros per cell") +
+        theme_minimal()
+
+    mean.zeros <- ggplot(fData.all,
+                       aes_string(x = "exprs_rank", y = "MeanRankZerosDiff",
+                                  colour = "Dataset")) +
+        geom_hline(yintercept = 0, colour = "red") +
+        geom_point() +
+        xlab("Expression rank") +
+        ylab("Difference in percentage zeros per gene") +
+        ggtitle("Difference in mean-zeros relationship") +
+        theme_minimal()
+
+    means.qq <- ggplot(fData.all,
+                       aes_string(x = "RefRankMeanLogCPM", y = "MeanLogCPM",
+                                  colour = "Dataset")) +
+        geom_abline(intercept = 0, slope = 1, colour = "red") +
+        geom_point() +
+        xlab(expression(paste("Reference mean ", log[2], "(CPM + 1)"))) +
+        ylab(expression(paste("Alternative mean ", log[2], "(CPM + 1)"))) +
+        ggtitle("Ranked means") +
+        theme_minimal()
+
+    vars.qq <- ggplot(fData.all,
+                      aes_string(x = "RefRankVarLogCPM", y = "VarLogCPM",
+                                 colour = "Dataset")) +
+        geom_abline(intercept = 0, slope = 1, colour = "red") +
+        geom_point() +
+        xlab(expression(paste("Reference variance ", log[2], "(CPM + 1)"))) +
+        ylab(expression(paste("Alternative variance ", log[2], "(CPM + 1)"))) +
+        ggtitle("Ranked variances") +
+        theme_minimal()
+
+    libs.qq <- ggplot(pData.all,
+                      aes_string(x = "RefRankLibSize", y = "total_counts",
+                                 colour = "Dataset")) +
+        geom_abline(intercept = 0, slope = 1, colour = "red") +
+        geom_point() +
+        xlab("Reference library size") +
+        ylab("Alternative library size") +
+        ggtitle("Ranked library sizes") +
+        theme_minimal()
+
+    z.gene.qq <- ggplot(fData.all,
+                        aes_string(x = "RefRankZeros", y = "pct_dropout",
+                                   colour = "Dataset")) +
+        geom_abline(intercept = 0, slope = 1, colour = "red") +
+        geom_point() +
+        xlab("Reference percentage zeros") +
+        ylab("Alternative percentage zeros") +
+        ggtitle("Ranked percentage zeros per gene") +
+        theme_minimal()
+
+    z.cell.qq <- ggplot(pData.all,
+                        aes_string(x = "RefRankZeros", y = "pct_dropout",
+                                   colour = "Dataset")) +
+        geom_abline(intercept = 0, slope = 1, colour = "red") +
+        geom_point() +
+        xlab("Reference percentage zeros") +
+        ylab("Alternative percentage zeros") +
+        ggtitle("Ranked percentage zeros per cell") +
+        theme_minimal()
+
+    comparison <- list(Reference = ref.sce,
+                       FeatureData = fData.all,
+                       PhenoData = pData.all,
+                       Plots = list(Means = means,
+                                    Variances = vars,
+                                    MeanVar = mean.var,
+                                    LibrarySizes = libs,
+                                    ZerosGene = z.gene,
+                                    ZerosCell = z.cell,
+                                    MeanZeros = mean.zeros),
+                       QQPlots = list(Means = means.qq,
+                                      Variances = vars.qq,
+                                      LibrarySizes = libs.qq,
+                                      ZerosGene = z.gene.qq,
+                                      ZerosCell = z.cell.qq))
+
+    return(comparison)
+}
diff --git a/R/lun2-estimate.R b/R/lun2-estimate.R
index cde774b..6a5f530 100644
--- a/R/lun2-estimate.R
+++ b/R/lun2-estimate.R
@@ -185,7 +185,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
 
     zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop))
 
-    params <- setParams(params, nGenes = length(logmeans),
+    params <- setParams(params, nGenes = nrow(counts),
                         cell.plates = plates, plate.var = sigma2,
                         gene.means = exp(logmeans),
                         gene.disps = dge$tagwise.dispersion,
diff --git a/R/splat-simulate.R b/R/splat-simulate.R
index 70a987d..1feb4cf 100644
--- a/R/splat-simulate.R
+++ b/R/splat-simulate.R
@@ -192,7 +192,7 @@ splatSimulate <- function(params = newSplatParams(),
     sim <- splatSimBCVMeans(sim, params)
     if (verbose) {message("Simulating counts..")}
     sim <- splatSimTrueCounts(sim, params)
-    if (verbose) {message("Simulating dropout...")}
+    if (verbose) {message("Simulating dropout (if needed)...")}
     sim <- splatSimDropout(sim, params)
 
     if (verbose) {message("Creating final SCESet...")}
@@ -537,8 +537,13 @@ splatSimBCVMeans <- function(sim, params) {
     bcv.df <- getParam(params, "bcv.df")
     base.means.cell <- get_exprs(sim, "BaseCellMeans")
 
-    bcv <- (bcv.common + (1 / sqrt(base.means.cell))) *
-        sqrt(bcv.df / rchisq(nGenes, df = bcv.df))
+    if (is.finite(bcv.df)) {
+        bcv <- (bcv.common + (1 / sqrt(base.means.cell))) *
+            sqrt(bcv.df / rchisq(nGenes, df = bcv.df))
+    } else {
+        warning("'bcv.df' is infinite. This parameter will be ignored.")
+        bcv <- (bcv.common + (1 / sqrt(base.means.cell)))
+    }
 
     means.cell <- matrix(rgamma(nGenes * nCells, shape = 1 / (bcv ^ 2),
                                 scale = base.means.cell * (bcv ^ 2)),
diff --git a/man/addFeatureStats.Rd b/man/addFeatureStats.Rd
index 8d454be..f4f72ef 100644
--- a/man/addFeatureStats.Rd
+++ b/man/addFeatureStats.Rd
@@ -30,6 +30,8 @@ Add additional feature statistics to an SCESet object
 \details{
 Currently adds the following statistics: mean, variance, coefficient of
 variation, median and median absolute deviation. Statistics are added to
-the \code{fData} slot and are named \code{stat_[log]_value_[no0]} where
-\code{log} and \code{no0} are added if those arguments are true.
+the \code{fData} slot and are named \code{Stat[Log]Value[No0]} where
+\code{Log} and \code{No0} are added if those arguments are true.
+UpperCamelCase is used to differentiate these columns from those added by
+\code{scater}.
 }
diff --git a/man/compareSCESets.Rd b/man/compareSCESets.Rd
index 14eb59d..e0533d3 100644
--- a/man/compareSCESets.Rd
+++ b/man/compareSCESets.Rd
@@ -14,10 +14,10 @@ List containing the combined datasets and plots.
 }
 \description{
 Combine the data from several SCESet objects and produce some basic plots
-comparing.
+comparing them.
 }
 \details{
-The return list has three items:
+The returned list has three items:
 
 \describe{
     \item{\code{FeatureData}}{Combined feature data from the provided
@@ -25,8 +25,8 @@ The return list has three items:
     \item{\code{PhenoData}}{Combined pheno data from the provided SCESets.}
     \item{\code{Plots}}{Comparison plots
         \describe{
-            \item{\code{Means}}{Violin plot of mean distribution.}
-            \item{\code{Variances}}{Violin plot of variance distribution.}
+            \item{\code{Means}}{Boxplot of mean distribution.}
+            \item{\code{Variances}}{Boxplot of variance distribution.}
             \item{\code{MeanVar}}{Scatter plot with fitted lines showing the
             mean-variance relationship.}
             \item{\code{LibraySizes}}{Boxplot of the library size
diff --git a/man/diffSCESets.Rd b/man/diffSCESets.Rd
new file mode 100644
index 0000000..3fc3e60
--- /dev/null
+++ b/man/diffSCESets.Rd
@@ -0,0 +1,76 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compare.R
+\name{diffSCESets}
+\alias{diffSCESets}
+\title{Diff SCESet objects}
+\usage{
+diffSCESets(sces, ref)
+}
+\arguments{
+\item{sces}{named list of SCESet objects to combine and compare.}
+
+\item{ref}{string giving the name of the SCESet to use as the reference}
+}
+\value{
+List containing the combined datasets and plots.
+}
+\description{
+Combine the data from several SCESet objects and produce some basic plots
+comparing them to a reference.
+}
+\details{
+This function aims to look at the differences between a reference SCESet and
+one or more others. It requires each SCESet to have the same dimensions.
+Properties are compared by ranks, for example when comparing the means the
+values are ordered and the differences between the reference and another
+dataset plotted. A series of Q-Q plots are also returned.
+
+The returned list has five items:
+
+\describe{
+    \item{\code{Reference}}{The SCESet used as the reference.}
+    \item{\code{FeatureData}}{Combined feature data from the provided
+    SCESets.}
+    \item{\code{PhenoData}}{Combined pheno data from the provided SCESets.}
+    \item{\code{Plots}}{Difference plots
+        \describe{
+            \item{\code{Means}}{Boxplot of mean differences.}
+            \item{\code{Variances}}{Boxplot of variance differences.}
+            \item{\code{MeanVar}}{Scatter plot showing the difference from
+            the reference variance across expression ranks.}
+            \item{\code{LibraySizes}}{Boxplot of the library size
+            differences.}
+            \item{\code{ZerosGene}}{Boxplot of the differences in the
+            percentage of each gene that is zero.}
+            \item{\code{ZerosCell}}{Boxplot of the differences in the
+            percentage of each cell that is zero.}
+            \item{\code{MeanZeros}}{Scatter plot showing the difference from
+            the reference percentage of zeros across expression ranks.}
+    }
+  }
+  \item{\code{QQPlots}}{Quantile-Quantile plots
+      \describe{
+          \item{\code{Means}}{Q-Q plot of the means.}
+          \item{\code{Variances}}{Q-Q plot of the variances.}
+          \item{\code{LibrarySizes}}{Q-Q plot of the library sizes.}
+          \item{\code{ZerosGene}}{Q-Q plot of the percentage of zeros per
+          gene.}
+          \item{\code{ZerosCell}}{Q-Q plot of the percentage of zeros per
+          cell.}
+      }
+  }
+}
+
+The plots returned by this function are created using
+\code{\link[ggplot2]{ggplot}} and are only a sample of the kind of plots you
+might like to consider. The data used to create these plots is also returned
+and should be in the correct format to allow you to create further plots
+using \code{\link[ggplot2]{ggplot}}.
+}
+\examples{
+sim1 <- splatSimulate(nGenes = 1000, groupCells = 20)
+sim2 <- simpleSimulate(nGenes = 1000, nCells = 20)
+difference <- diffSCESets(list(Splat = sim1, Simple = sim2), ref = "Simple")
+names(difference)
+names(difference$Plots)
+}
diff --git a/tests/testthat/test-splat-simulate.R b/tests/testthat/test-splat-simulate.R
index d5e329a..ad563b7 100644
--- a/tests/testthat/test-splat-simulate.R
+++ b/tests/testthat/test-splat-simulate.R
@@ -14,4 +14,9 @@ test_that("one group switches to single mode", {
                    "nGroups is 1, switching to single mode")
     expect_silent(splatSimulate(test.params, method = "paths",
                                 groupCells = c(10), verbose = FALSE))
-})
\ No newline at end of file
+})
+
+test_that("infinite bcv.df is detected", {
+    expect_warning(splatSimulate(test.params, bcv.df = Inf),
+                   "'bcv.df' is infinite. This parameter will be ignored.")
+})
diff --git a/vignettes/splatter.Rmd b/vignettes/splatter.Rmd
index 03eb98f..376ba8c 100644
--- a/vignettes/splatter.Rmd
+++ b/vignettes/splatter.Rmd
@@ -452,6 +452,27 @@ ggplot(comparison$PhenoData,
     geom_point()
 ```
 
+## Comparing differences
+
+Sometimes instead of visually comparing datasets it may be more interesting to
+look at the differences between them. We can do this using the `diffSCESets`
+function. Similar to `compareSCESets` this function takes a list of `SCESet`
+objects but now we also specify one to be a reference. A series of similar plots
+are returned but instead of showing the overall distributions they demonstrate
+differences from the reference.
+
+```{r difference}
+difference <- diffSCESets(list(Splat = sim1, Simple = sim2), ref = "Simple")
+difference$Plots$Means
+```
+
+We also get a series of Quantile-Quantile plot that can be used to compare
+distributions.
+
+```{r difference-qq}
+difference$QQPlots$Means
+```
+
 # Session information {-}
 
 ```{r sessionInfo}
-- 
GitLab