From 69ba7ee1e9db5a9e8b0a0fc6ba84f2eb4a4a01be Mon Sep 17 00:00:00 2001
From: Luke Zappia <lazappi@users.noreply.github.com>
Date: Thu, 22 Aug 2019 15:08:15 +1000
Subject: [PATCH] Add density sampling option for means, lib sizes

---
 DESCRIPTION               |  4 +--
 NEWS.md                   |  4 +++
 R/AllClasses.R            | 18 +++++++++++-
 R/SplotchParams-methods.R | 20 ++++++++++----
 R/splotch-estimate.R      |  8 ++++--
 R/splotch-simulate.R      | 58 +++++++++++++++++++++++++--------------
 man/SplotchParams.Rd      | 11 +++++++-
 7 files changed, 92 insertions(+), 31 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index b036793..1b17759 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: splatter
 Type: Package
 Title: Simple Simulation of Single-cell RNA Sequencing Data
-Version: 1.9.4.9009
-Date: 2019-08-21
+Version: 1.9.4.9010
+Date: 2019-08-22
 Author: Luke Zappia
 Authors@R:
     c(person("Luke", "Zappia", role = c("aut", "cre"),
diff --git a/NEWS.md b/NEWS.md
index 8ccfac0..99438d9 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+### Version 1.9.4.9010 (2019-08-22)
+
+* Add density sampling options for means and library sizes
+
 ### Version 1.9.4.9009 (2019-08-21)
 
 * Replace library size log-normal with density and rejection sampling
diff --git a/R/AllClasses.R b/R/AllClasses.R
index 3af99fe..f14b964 100644
--- a/R/AllClasses.R
+++ b/R/AllClasses.R
@@ -279,7 +279,13 @@ setClass("SplatParams",
 #'             the expression outlier factor log-normal distribution.}
 #'             \item{\code{mean.outFacScale}}{Scale (sdlog) parameter for the
 #'             expression outlier factor log-normal distribution.}
-#'             \item{\code{mean.values}}{Vector of means for each gene.}
+#'             \item{\code{mean.dens}}{\code{\link{density}} object describing
+#'             the log gene mean density.}
+#'             \item{\code{[mean.method]}}{Method to use for simulating gene
+#'             means. Either "fit" to sample from a gamma distribution (with
+#'             expression outliers) or "density" to sample from the provided
+#'             density object.}
+#'             \item{\code{[mean.values]}}{Vector of means for each gene.}
 #'         }
 #'     }
 #'     \item{\emph{Network parameters}}{
@@ -306,6 +312,9 @@ setClass("SplatParams",
 #'             distribution is used.}
 #'             \item{\code{lib.dens}}{\code{\link{density}} object describing
 #'             the library size density.}
+#'             \item{\code{[lib.method]}}{Method to use for simulating library
+#'             sizes. Either "fit" to sample from a log-normal distribution or
+#'             "density" to sample from the provided density object.}
 #'         }
 #'     }
 #'     \item{\emph{Paths parameters}}{
@@ -331,6 +340,8 @@ setClass("SplotchParams",
                    mean.outProb = "numeric",
                    mean.outLoc = "numeric",
                    mean.outScale = "numeric",
+                   mean.dens = "density",
+                   mean.method = "character",
                    mean.values = "numeric",
                    network.graph = "ANY",
                    network.nRegs = "numeric",
@@ -341,12 +352,16 @@ setClass("SplotchParams",
                    lib.loc = "numeric",
                    lib.scale = "numeric",
                    lib.dens = "density",
+                   lib.method = "character",
                    cells.design = "data.frame"),
          prototype = prototype(mean.rate = 0.3,
                                mean.shape = 0.6,
                                mean.outProb = 0.05,
                                mean.outLoc = 4,
                                mean.outScale = 0.5,
+                               mean.dens = density(rgamma(10000, rate = 0.3,
+                                                          shape = 0.6)),
+                               mean.method = "fit",
                                mean.values = numeric(),
                                network.graph = NULL,
                                network.nRegs = 100,
@@ -361,6 +376,7 @@ setClass("SplotchParams",
                                lib.loc = 11,
                                lib.scale = 0.2,
                                lib.dens = density(rlnorm(10000, 11, 0.2)),
+                               lib.method = "fit",
                                cells.design = data.frame(
                                    Path = 1,
                                    Probability = 1,
diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R
index 6f4d13b..56a1b6c 100644
--- a/R/SplotchParams-methods.R
+++ b/R/SplotchParams-methods.R
@@ -25,6 +25,9 @@ setValidity("SplotchParams", function(object) {
                 mean.outProb = checkNumber(v$mean.outProb, lower = 0, upper = 1),
                 mean.outLoc = checkNumber(v$mean.outLoc),
                 mean.outScale = checkNumber(v$mean.outScale, lower = 0),
+                mean.dens = checkmate::checkClass(v$mean.dens, "density"),
+                mean.method = checkmate::checkChoice(v$mean.method,
+                                                     c("fit", "density")),
                 network.graph = checkmate::checkClass(v$network, "igraph",
                                                       null.ok = TRUE),
                 network.nRegs = checkmate::checkInt(v$network.nRegs,
@@ -40,6 +43,8 @@ setValidity("SplotchParams", function(object) {
                 lib.loc = checkmate::checkNumber(v$lib.loc),
                 lib.scale = checkmate::checkNumber(v$lib.scale, lower = 0),
                 lib.dens = checkmate::checkClass(v$lib.dens, "density"),
+                lib.method = checkmate::checkChoice(v$lib.method,
+                                                    c("fit", "density")),
                 cells.design = checkmate::checkDataFrame(v$cells.design,
                                                          types = "numeric",
                                                          any.missing = FALSE,
@@ -124,6 +129,8 @@ setMethod("show", "SplotchParams", function(object) {
                                "(Out Prob)"     = "mean.outProb",
                                "(Out Location)" = "mean.outLoc",
                                "(Out Scale)"    = "mean.outScale",
+                               "(Density)"      = "mean.density",
+                               "[Method]"       = "mean.method",
                                "[Values]*"      = "mean.values"))
 
     pp.network <- list("Network:" = c("[Graph]"   = "network.graph",
@@ -135,7 +142,8 @@ setMethod("show", "SplotchParams", function(object) {
 
     pp.bot <- list("Library size:" = c("(Location)" = "lib.loc",
                                        "(Scale)"    = "lib.scale",
-                                       "(Density)"  = "lib.dens"),
+                                       "(Density)"  = "lib.dens",
+                                       "[Method]"   = "lib.method"),
                    "Cells:"        = c("[Design]"   = "cells.design"))
 
     paths.means <- getParam(object, "paths.means")
@@ -205,10 +213,12 @@ setMethod("setParam", "SplotchParams", function(object, name, value) {
 
     if (name == "network.graph") {
         checkmate::assertClass(value, "igraph")
-        object <- setParamUnchecked(object, "nGenes", igraph::gorder(value))
-        if (!(length(getParam(object, "mean.values")) == 0)) {
-            warning("changing network.graph resets mean.values")
-            object <- setParam(object, "mean.values", numeric())
+        if (getParam(object, "nGenes") != igraph::gorder(value)) {
+            if (!(length(getParam(object, "mean.values")) == 0)) {
+                warning("changing network.graph resets mean.values")
+                object <- setParam(object, "mean.values", numeric())
+            }
+            object <- setParamUnchecked(object, "nGenes", igraph::gorder(value))
         }
 
         if ("IsReg" %in% igraph::vertex_attr_names(value)) {
diff --git a/R/splotch-estimate.R b/R/splotch-estimate.R
index 523e07b..cae0f88 100644
--- a/R/splotch-estimate.R
+++ b/R/splotch-estimate.R
@@ -78,7 +78,7 @@ splotchEstMean <- function(norm.counts, params, verbose) {
     med <- median(lmeans)
     mad <- mad(lmeans)
 
-    bound <- med + 1 * mad
+    bound <- med + 2 * mad
 
     outs <- which(lmeans > bound)
 
@@ -88,13 +88,16 @@ splotchEstMean <- function(norm.counts, params, verbose) {
 
     if (length(outs) > 1) {
         facs <- means[outs] / median(means)
-        fit <- selectFit(facs, "lnorm", verbose = verbose)
+        #fit <- selectFit(facs, "lnorm", verbose = verbose)
+        fit <- fitdistrplus::fitdist(facs, "lnorm")
 
         params <- setParams(params,
                             mean.outLoc = unname(fit$estimate["meanlog"]),
                             mean.outScale = unname(fit$estimate["sdlog"]))
     }
 
+    params <- setParams(params, mean.dens = density(lmeans))
+
     return(params)
 }
 
@@ -191,6 +194,7 @@ selectFit <- function(data, distr, weights = NULL, verbose = TRUE) {
     }
 
     scores <- fitdistrplus::gofstat(fits)$cvm
+
     # Flatten in case scores is a list
     scores.flat <- unlist(scores)
     selected <- which(scores.flat == min(scores.flat, na.rm = TRUE))
diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R
index 1148c61..5632389 100644
--- a/R/splotch-simulate.R
+++ b/R/splotch-simulate.R
@@ -146,20 +146,30 @@ splotchSimGeneMeans <- function(params, verbose) {
 
     if (verbose) {message("Simulating means...")}
     nGenes <- getParam(params, "nGenes")
-    mean.shape <- getParam(params, "mean.shape")
-    mean.rate <- getParam(params, "mean.rate")
-    mean.outProb <- getParam(params, "mean.outProb")
-    mean.outLoc <- getParam(params, "mean.outLoc")
-    mean.outScale <- getParam(params, "mean.outScale")
-
-    mean.values <- rgamma(nGenes, shape = mean.shape, rate = mean.rate)
-
-    outlier.facs <- getLNormFactors(nGenes, mean.outProb, 0, mean.outLoc,
-                                    mean.outScale)
-    median.means.gene <- median(mean.values)
-    outlier.means <- median.means.gene * outlier.facs
-    is.outlier <- outlier.facs != 1
-    mean.values[is.outlier] <- outlier.means[is.outlier]
+    mean.method <- getParam(params, "mean.method")
+
+    if (mean.method == "fit") {
+        if (verbose) {message("Sampling from gamma distribution...")}
+        mean.shape <- getParam(params, "mean.shape")
+        mean.rate <- getParam(params, "mean.rate")
+        mean.outProb <- getParam(params, "mean.outProb")
+        mean.outLoc <- getParam(params, "mean.outLoc")
+        mean.outScale <- getParam(params, "mean.outScale")
+
+        mean.values <- rgamma(nGenes, shape = mean.shape, rate = mean.rate)
+
+        outlier.facs <- getLNormFactors(nGenes, mean.outProb, 0, mean.outLoc,
+                                        mean.outScale)
+        median.means.gene <- median(mean.values)
+        outlier.means <- median.means.gene * outlier.facs
+        is.outlier <- outlier.facs != 1
+        mean.values[is.outlier] <- outlier.means[is.outlier]
+    } else if (mean.method == "density") {
+        if (verbose) {message("Sampling from density object...")}
+        mean.dens <- getParam(params, "mean.dens")
+
+        mean.values <- exp(sampleDensity(nGenes, mean.dens, lower = -Inf))
+    }
 
     params <- setParam(params, "mean.values", mean.values)
 
@@ -242,12 +252,20 @@ splotchSimLibSizes <- function(sim, params, verbose) {
 
     if (verbose) {message("Simulating library sizes...")}
     nCells <- getParam(params, "nCells")
-    # lib.loc <- getParam(params, "lib.loc")
-    # lib.scale <- getParam(params, "lib.scale")
-    lib.dens <- getParam(params, "lib.dens")
+    lib.method <- getParam(params, "lib.method")
 
-    # exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale)
-    exp.lib.sizes <- rejectionSample(nCells, lib.dens)
+    if (lib.method == "fit") {
+        if (verbose) {message("Sampling from log-normal distribution...")}
+        lib.loc <- getParam(params, "lib.loc")
+        lib.scale <- getParam(params, "lib.scale")
+
+        exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale)
+    } else if (lib.method == "density") {
+        if (verbose) {message("Sampling from density object...")}
+        lib.dens <- getParam(params, "lib.dens")
+
+        exp.lib.sizes <- sampleDensity(nCells, lib.dens)
+    }
 
     colData(sim)$ExpLibSize <- exp.lib.sizes
 
@@ -352,7 +370,7 @@ getBetaStepProbs <- function(steps, alpha, beta) {
 }
 
 #' @importFrom stats approxfun
-rejectionSample <- function(n, dens, lower = 0) {
+sampleDensity <- function(n, dens, lower = 0) {
 
     xmin <- min(dens$x)
     xmax <- max(dens$x)
diff --git a/man/SplotchParams.Rd b/man/SplotchParams.Rd
index 2c7861c..0b03061 100644
--- a/man/SplotchParams.Rd
+++ b/man/SplotchParams.Rd
@@ -29,7 +29,13 @@ The Splotch simulation uses the following parameters:
             the expression outlier factor log-normal distribution.}
             \item{\code{mean.outFacScale}}{Scale (sdlog) parameter for the
             expression outlier factor log-normal distribution.}
-            \item{\code{mean.values}}{Vector of means for each gene.}
+            \item{\code{mean.dens}}{\code{\link{density}} object describing
+            the log gene mean density.}
+            \item{\code{[mean.method]}}{Method to use for simulating gene
+            means. Either "fit" to sample from a gamma distribution (with
+            expression outliers) or "density" to sample from the provided
+            density object.}
+            \item{\code{[mean.values]}}{Vector of means for each gene.}
         }
     }
     \item{\emph{Network parameters}}{
@@ -56,6 +62,9 @@ The Splotch simulation uses the following parameters:
             distribution is used.}
             \item{\code{lib.dens}}{\code{\link{density}} object describing
             the library size density.}
+            \item{\code{[lib.method]}}{Method to use for simulating library
+            sizes. Either "fit" to sample from a log-normal distribution or
+            "density" to sample from the provided density object.}
         }
     }
     \item{\emph{Paths parameters}}{
-- 
GitLab