From de1c10d9e6933b6e6fccc533e33980fcea5eb417 Mon Sep 17 00:00:00 2001
From: Luke Zappia <lazappi@users.noreply.github.com>
Date: Wed, 21 Aug 2019 17:27:59 +1000
Subject: [PATCH] Replace library size log-norm with density

Not sure if this this a good plan...
---
 DESCRIPTION               |  4 ++--
 NAMESPACE                 |  3 +++
 NEWS.md                   |  4 ++++
 R/AllClasses.R            |  4 ++++
 R/SplotchParams-methods.R | 10 ++++++----
 R/params-functions.R      | 15 ++++++++++++---
 R/splotch-estimate.R      |  6 +++++-
 R/splotch-simulate.R      | 36 +++++++++++++++++++++++++++++++++---
 man/SplotchParams.Rd      |  2 ++
 9 files changed, 71 insertions(+), 13 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 888d745..b036793 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: splatter
 Type: Package
 Title: Simple Simulation of Single-cell RNA Sequencing Data
-Version: 1.9.4.9008
-Date: 2019-08-20
+Version: 1.9.4.9009
+Date: 2019-08-21
 Author: Luke Zappia
 Authors@R:
     c(person("Luke", "Zappia", role = c("aut", "cre"),
diff --git a/NAMESPACE b/NAMESPACE
index a2a713a..08a786f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -114,6 +114,7 @@ importFrom(ggplot2,geom_hline)
 importFrom(ggplot2,geom_point)
 importFrom(ggplot2,geom_smooth)
 importFrom(ggplot2,geom_tile)
+importFrom(ggplot2,geom_violin)
 importFrom(ggplot2,ggplot)
 importFrom(ggplot2,ggtitle)
 importFrom(ggplot2,scale_colour_manual)
@@ -136,8 +137,10 @@ importFrom(methods,slot)
 importFrom(methods,slotNames)
 importFrom(methods,validObject)
 importFrom(stats,aggregate)
+importFrom(stats,approxfun)
 importFrom(stats,cor)
 importFrom(stats,dbeta)
+importFrom(stats,density)
 importFrom(stats,dnbinom)
 importFrom(stats,ks.test)
 importFrom(stats,median)
diff --git a/NEWS.md b/NEWS.md
index 9143752..2aa1416 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+### Version 1.9.4.9009 (2019-08-21)
+
+* Replace library size log-normal with density and rejection sampling
+
 ### Version 1.9.4.9008 (2019-08-20)
 
 * Merge master into splotch branch
diff --git a/R/AllClasses.R b/R/AllClasses.R
index 4124ac3..3af99fe 100644
--- a/R/AllClasses.R
+++ b/R/AllClasses.R
@@ -304,6 +304,8 @@ setClass("SplatParams",
 #'             \item{\code{lib.scale}}{Scale (sdlog) parameter for the library
 #'             size log-normal distribution, or sd parameter if a normal
 #'             distribution is used.}
+#'             \item{\code{lib.dens}}{\code{\link{density}} object describing
+#'             the library size density.}
 #'         }
 #'     }
 #'     \item{\emph{Paths parameters}}{
@@ -338,6 +340,7 @@ setClass("SplotchParams",
                    paths.means = "list",
                    lib.loc = "numeric",
                    lib.scale = "numeric",
+                   lib.dens = "density",
                    cells.design = "data.frame"),
          prototype = prototype(mean.rate = 0.3,
                                mean.shape = 0.6,
@@ -357,6 +360,7 @@ setClass("SplotchParams",
                                paths.means = list(),
                                lib.loc = 11,
                                lib.scale = 0.2,
+                               lib.dens = density(rlnorm(10000, 11, 0.2)),
                                cells.design = data.frame(
                                    Path = 1,
                                    Probability = 1,
diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R
index 9b6e736..6f4d13b 100644
--- a/R/SplotchParams-methods.R
+++ b/R/SplotchParams-methods.R
@@ -37,8 +37,9 @@ setValidity("SplotchParams", function(object) {
                                                          any.missing = FALSE,
                                                          min.rows = 1,
                                                          ncols = 3),
-                lib.loc = checkNumber(v$lib.loc),
-                lib.scale = checkNumber(v$lib.scale, lower = 0),
+                lib.loc = checkmate::checkNumber(v$lib.loc),
+                lib.scale = checkmate::checkNumber(v$lib.scale, lower = 0),
+                lib.dens = checkmate::checkClass(v$lib.dens, "density"),
                 cells.design = checkmate::checkDataFrame(v$cells.design,
                                                          types = "numeric",
                                                          any.missing = FALSE,
@@ -133,8 +134,9 @@ setMethod("show", "SplotchParams", function(object) {
                                   "[Design]"  = "paths.design"))
 
     pp.bot <- list("Library size:" = c("(Location)" = "lib.loc",
-                                       "(Scale)"  = "lib.scale"),
-                   "Cells:"        = c("[Design]" = "cells.design"))
+                                       "(Scale)"    = "lib.scale",
+                                       "(Density)"  = "lib.dens"),
+                   "Cells:"        = c("[Design]"   = "cells.design"))
 
     paths.means <- getParam(object, "paths.means")
     if (length(paths.means) == 0) {
diff --git a/R/params-functions.R b/R/params-functions.R
index 5e7eed9..3a05a05 100644
--- a/R/params-functions.R
+++ b/R/params-functions.R
@@ -107,10 +107,19 @@ showValues <- function(values, not.default) {
                              len = length(values))
 
     short.values <- sapply(values, function(x) {
-        if (length(x) > 4) {
-            paste0(paste(head(x, n = 4), collapse = ", "), ",...")
+        if (is.list(x)) {
+            classes <- class(x)
+            if (length(classes) == 1 && classes == "list") {
+                paste("List with", length(x), "items")
+            } else {
+                paste("Object of class", paste(classes, collapse = ", "))
+            }
         } else {
-            paste(x, collapse = ", ")
+            if (length(x) > 4) {
+                paste0(paste(head(x, n = 4), collapse = ", "), ",...")
+            } else {
+                paste(x, collapse = ", ")
+            }
         }
     })
 
diff --git a/R/splotch-estimate.R b/R/splotch-estimate.R
index d4bff44..523e07b 100644
--- a/R/splotch-estimate.R
+++ b/R/splotch-estimate.R
@@ -98,6 +98,7 @@ splotchEstMean <- function(norm.counts, params, verbose) {
     return(params)
 }
 
+#' @importFrom stats density
 splotchEstLib <- function(counts, params, verbose) {
 
     if (verbose) {message("Estimating library size parameters...")}
@@ -109,7 +110,10 @@ splotchEstLib <- function(counts, params, verbose) {
     lib.loc <- unname(fit$estimate["meanlog"])
     lib.scale <- unname(fit$estimate["sdlog"])
 
-    params <- setParams(params, lib.loc = lib.loc, lib.scale = lib.scale)
+    dens <- density(lib.sizes)
+
+    params <- setParams(params, lib.loc = lib.loc, lib.scale = lib.scale,
+                        lib.dens = dens)
 
     return(params)
 }
diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R
index 3fa1e82..1148c61 100644
--- a/R/splotch-simulate.R
+++ b/R/splotch-simulate.R
@@ -242,10 +242,13 @@ splotchSimLibSizes <- function(sim, params, verbose) {
 
     if (verbose) {message("Simulating library sizes...")}
     nCells <- getParam(params, "nCells")
-    lib.loc <- getParam(params, "lib.loc")
-    lib.scale <- getParam(params, "lib.scale")
+    # lib.loc <- getParam(params, "lib.loc")
+    # lib.scale <- getParam(params, "lib.scale")
+    lib.dens <- getParam(params, "lib.dens")
+
+    # exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale)
+    exp.lib.sizes <- rejectionSample(nCells, lib.dens)
 
-    exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale)
     colData(sim)$ExpLibSize <- exp.lib.sizes
 
     return(sim)
@@ -347,3 +350,30 @@ getBetaStepProbs <- function(steps, alpha, beta) {
 
     return(probs)
 }
+
+#' @importFrom stats approxfun
+rejectionSample <- function(n, dens, lower = 0) {
+
+    xmin <- min(dens$x)
+    xmax <- max(dens$x)
+    ymin <- min(dens$y)
+    ymax <- max(dens$y)
+
+    boundary <- approxfun(dens$x, dens$y)
+
+    values <- c()
+    nsel <- 0
+
+    while(nsel < n) {
+        x <- runif(1e4, xmin, xmax)
+        y <- runif(1e4, ymin, ymax)
+        sel <- y < boundary(x) & x > lower
+
+        nsel <- nsel + sum(sel)
+        values <- c(values, x[sel])
+    }
+
+    values <- values[1:n]
+
+    return(values)
+}
diff --git a/man/SplotchParams.Rd b/man/SplotchParams.Rd
index 904b42a..2c7861c 100644
--- a/man/SplotchParams.Rd
+++ b/man/SplotchParams.Rd
@@ -54,6 +54,8 @@ The Splotch simulation uses the following parameters:
             \item{\code{lib.scale}}{Scale (sdlog) parameter for the library
             size log-normal distribution, or sd parameter if a normal
             distribution is used.}
+            \item{\code{lib.dens}}{\code{\link{density}} object describing
+            the library size density.}
         }
     }
     \item{\emph{Paths parameters}}{
-- 
GitLab