Add BCV adjustment to Splotch

New exponential correction method for bcv.common estimates

Add BCV adjustment to Splotch
540b62d0 · Luke Zappia · 7d1fcd34 · 540b62d0 · 540b62d0 · 540b62d0
Commit 540b62d0 authored 5 years ago by Luke Zappia
--- a/DESCRIPTION
+++ b/DESCRIPTION
 Package: splatter
 Type: Package
 Title: Simple Simulation of Single-cell RNA Sequencing Data
-Version: 1.9.4.9010
+Version: 1.9.4.9011
-Date: 2019-08-22
+Date: 2019-08-29
 Author: Luke Zappia
 Authors@R:
    c(person("Luke", "Zappia", role = c("aut", "cre"),

--- a/NEWS.md
+++ b/NEWS.md
+### Version 1.9.4.9011 (2019-08-29)
+* Add BCV adjustment to Splotch simualation
+* Use new exponential correction for bcv.common
 ### Version 1.9.4.9010 (2019-08-22)
 * Add density sampling options for means and library sizes

--- a/R/AllClasses.R
+++ b/R/AllClasses.R
@@ -288,6 +288,14 @@ setClass("SplatParams",
 #'             \item{\code{[mean.values]}}{Vector of means for each gene.}
 #'         }
 #'     }
+#'     \item{\emph{Biological Coefficient of Variation parameters}}{
+#'         \describe{
+#'             \item{\code{bcv.common}}{Underlying common dispersion across all
+#'             genes.}
+#'             \item{\code{[bcv.df]}}{Degrees of Freedom for the BCV inverse
+#'             chi-squared distribution.}
+#'         }
+#'     }
 #'     \item{\emph{Network parameters}}{
 #'         \describe{
 #'             \item{\code{[network.graph]}}{Graph containing the gene network.}
@@ -343,6 +351,8 @@ setClass("SplotchParams",
                   mean.dens = "density",
                   mean.method = "character",
                   mean.values = "numeric",
+                   bcv.common = "numeric",
+                   bcv.df = "numeric",
                   network.graph = "ANY",
                   network.nRegs = "numeric",
                   network.regsSet = "logical",
@@ -363,6 +373,8 @@ setClass("SplotchParams",
                                                          shape = 0.6)),
                               mean.method = "fit",
                               mean.values = numeric(),
+                               bcv.common = 0.1,
+                               bcv.df = 60,
                               network.graph = NULL,
                               network.nRegs = 100,
                               network.regsSet = FALSE,

--- a/R/SplotchParams-methods.R
+++ b/R/SplotchParams-methods.R
@@ -28,6 +28,8 @@ setValidity("SplotchParams", function(object) {
                mean.dens = checkmate::checkClass(v$mean.dens, "density"),
                mean.method = checkmate::checkChoice(v$mean.method,
                                                     c("fit", "density")),
+                bcv.common = checkNumber(v$bcv.common, lower = 0),
+                bcv.df = checkNumber(v$bcv.df, lower = 0),
                network.graph = checkmate::checkClass(v$network, "igraph",
                                                      null.ok = TRUE),
                network.nRegs = checkmate::checkInt(v$network.nRegs,
@@ -129,13 +131,15 @@ setMethod("show", "SplotchParams", function(object) {
                               "(Out Prob)"     = "mean.outProb",
                               "(Out Location)" = "mean.outLoc",
                               "(Out Scale)"    = "mean.outScale",
-                               "(Density)"      = "mean.density",
+                               "(Density)"      = "mean.dens",
                               "[Method]"       = "mean.method",
-                               "[Values]*"      = "mean.values"))
+                               "[Values]*"      = "mean.values"),
+                   "BCV:"  = c("(Common Disp)"  = "bcv.common",
+                               "[DoF]"          = "bcv.df"))
-    pp.network <- list("Network:" = c("[Graph]"   = "network.graph",
+    pp.network <- list("Network:" = c("[Graph]*"   = "network.graph",
-                                      "[nRegs]"   = "network.nRegs",
+                                      "[nRegs]"    = "network.nRegs",
-                                      "[regsSet]" = "network.regsSet"))
+                                      "[regsSet]*" = "network.regsSet"))
    pp.paths <- list("Paths:" = c("[nPrograms]" = "paths.nPrograms",
                                  "[Design]"  = "paths.design"))
@@ -160,15 +164,15 @@ setMethod("show", "SplotchParams", function(object) {
        showPP(object, pp.network)
    } else {
        cat(crayon::bold("Network:"), "\n")
-        cat(crayon::bold(crayon::blue("[GRAPH]\n")))
+        cat(crayon::bold(crayon::bgYellow(crayon::blue("[GRAPH]\n"))))
        cat(crayon::bold(crayon::green(paste(
            "Graph with", igraph::gorder(network.graph), "nodes and",
            igraph::gsize(network.graph), "edges\n"
        ))))
        show(network.graph)
        network.values <- list("[nRegs]" = getParam(object, "network.nRegs"),
-                               "[regsSet]" = getParam(object,
+                               "[regsSet]*" = getParam(object,
-                                                      "network.regsSet"))
+                                                       "network.regsSet"))
        network.default <- c(network.values$`[nRegs]` != 100,
                             network.values$`[regsSet]` != FALSE)
        showValues(network.values, network.default)

--- a/R/splotch-estimate.R
+++ b/R/splotch-estimate.R
@@ -53,6 +53,7 @@ splotchEstimate.matrix <- function(counts, params = newSplotchParams(),
    norm.counts <- norm.counts[rowSums(norm.counts > 0) > 1, ]
    params <- splotchEstMean(norm.counts, params, verbose)
+    params <- splotchEstBCV(counts, params, verbose)
    params <- splotchEstLib(counts, params, verbose)
    params <- setParams(params, nGenes = nrow(counts), nCells = ncol(counts))
@@ -101,6 +102,66 @@ splotchEstMean <- function(norm.counts, params, verbose) {
    return(params)
 }
+splotchEstBCV <- function(counts, params, verbose) {
+    if (verbose) {message("Estimating BCV parameters...")}
+    # Add dummy design matrix to avoid print statement
+    design <- matrix(1, ncol(counts), 1)
+    disps <- edgeR::estimateDisp(counts, design = design)
+    raw <- disps$common.dispersion
+    mean.rate <- getParam(params, "mean.rate")
+    mean.shape <- getParam(params, "mean.shape")
+    # Caculate factors for correction based on mean parameters
+    # Coefficents come from fitting
+    #   RawEst ~
+    #       (A1 * mean.rate + A2 * mean.shape + A3 * mean.rate *
+    #           E_mean.shape + A4) *
+    #       ((B1 * mean.rate + B2 * mean.shape + B3 * mean.rate *
+    #           mean.shape + B4) ^ SimBCVCommon) +
+    #       (C1 * mean.rate + C2 * mean.shape + C3 * mean.rate *
+    #           mean.shape + C4)
+    # Using minpack.lm::nlsLM
+    A <- -0.6 * mean.rate - 2.9 * mean.shape +
+        0.4 * mean.rate * mean.shape + 9.5
+    B <- 0.15 * mean.rate + 0.25 * mean.shape -
+        0.1 * mean.rate * mean.shape + 1.2
+    C <- 0.9 * mean.rate + 4.5 * mean.shape -
+        0.6 * mean.rate * mean.shape - 10.6
+    Y <- (raw - C) / A
+    message("Raw: ", raw, " A: ", A, " B: ", B, " C: ", C, " Y: ", Y)
+    # Check if Y <= 0 to avoid problems when taking log
+    if (Y <= 0) {
+        Y <- 0.0001
+    }
+    corrected <- log(Y, base = B)
+    # Dispersion cannot be negative so apply a simpler correction to those.
+    # Coefficients come from fitting
+    #   SimBCVCommon ~ EstRaw
+    # Using lm (negative values only)
+    if (corrected < 0) {
+        warning("Exponential corrected BCV is negative.",
+                "Using linear correction.")
+        corrected <- -0.1 + 0.1 * raw
+    }
+    if (corrected < 0) {
+        warning("Linear corrected BCV is negative.",
+                "Using existing bcv.common.")
+        corrected <- getParam(params, "bcv.common")
+    }
+    params <- setParams(params, bcv.common = corrected)
+    return(params)
+}
 #' @importFrom stats density
 splotchEstLib <- function(counts, params, verbose) {

--- a/R/splotch-simulate.R
+++ b/R/splotch-simulate.R
@@ -321,6 +321,23 @@ splotchSimCellMeans <- function(sim, params, verbose) {
    colnames(cells.means) <- cell.names
    rownames(cells.means) <- gene.names
+    nGenes <- getParam(params, "nGenes")
+    bcv.common <- getParam(params, "bcv.common")
+    bcv.df <- getParam(params, "bcv.df")
+    if (is.finite(bcv.df)) {
+        bcv <- (bcv.common + (1 / sqrt(cells.means))) *
+            sqrt(bcv.df / rchisq(nGenes, df = bcv.df))
+    } else {
+        warning("'bcv.df' is infinite. This parameter will be ignored.")
+        bcv <- (bcv.common + (1 / sqrt(cells.means)))
+    }
+    cells.means <- matrix(rgamma(
+        as.numeric(nGenes) * as.numeric(nCells),
+        shape = 1 / (bcv ^ 2), scale = cells.means * (bcv ^ 2)),
+        nrow = nGenes, ncol = nCells)
    colData(sim)$Path <- cells.paths
    colData(sim)$Step <- cells.steps
    assays(sim)$CellMeans <- cells.means

--- a/man/SplotchParams.Rd
+++ b/man/SplotchParams.Rd
@@ -38,6 +38,14 @@ The Splotch simulation uses the following parameters:
            \item{\code{[mean.values]}}{Vector of means for each gene.}
        }
    }
+    \item{\emph{Biological Coefficient of Variation parameters}}{
+        \describe{
+            \item{\code{bcv.common}}{Underlying common dispersion across all
+            genes.}
+            \item{\code{[bcv.df]}}{Degrees of Freedom for the BCV inverse
+            chi-squared distribution.}
+        }
+    }
    \item{\emph{Network parameters}}{
        \describe{
            \item{\code{[network.graph]}}{Graph containing the gene network.}