From de1c10d9e6933b6e6fccc533e33980fcea5eb417 Mon Sep 17 00:00:00 2001 From: Luke Zappia <lazappi@users.noreply.github.com> Date: Wed, 21 Aug 2019 17:27:59 +1000 Subject: [PATCH] Replace library size log-norm with density Not sure if this this a good plan... --- DESCRIPTION | 4 ++-- NAMESPACE | 3 +++ NEWS.md | 4 ++++ R/AllClasses.R | 4 ++++ R/SplotchParams-methods.R | 10 ++++++---- R/params-functions.R | 15 ++++++++++++--- R/splotch-estimate.R | 6 +++++- R/splotch-simulate.R | 36 +++++++++++++++++++++++++++++++++--- man/SplotchParams.Rd | 2 ++ 9 files changed, 71 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 888d745..b036793 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 1.9.4.9008 -Date: 2019-08-20 +Version: 1.9.4.9009 +Date: 2019-08-21 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index a2a713a..08a786f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -114,6 +114,7 @@ importFrom(ggplot2,geom_hline) importFrom(ggplot2,geom_point) importFrom(ggplot2,geom_smooth) importFrom(ggplot2,geom_tile) +importFrom(ggplot2,geom_violin) importFrom(ggplot2,ggplot) importFrom(ggplot2,ggtitle) importFrom(ggplot2,scale_colour_manual) @@ -136,8 +137,10 @@ importFrom(methods,slot) importFrom(methods,slotNames) importFrom(methods,validObject) importFrom(stats,aggregate) +importFrom(stats,approxfun) importFrom(stats,cor) importFrom(stats,dbeta) +importFrom(stats,density) importFrom(stats,dnbinom) importFrom(stats,ks.test) importFrom(stats,median) diff --git a/NEWS.md b/NEWS.md index 9143752..2aa1416 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +### Version 1.9.4.9009 (2019-08-21) + +* Replace library size log-normal with density and rejection sampling + ### Version 1.9.4.9008 (2019-08-20) * Merge master into splotch branch diff --git a/R/AllClasses.R b/R/AllClasses.R index 4124ac3..3af99fe 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -304,6 +304,8 @@ setClass("SplatParams", #' \item{\code{lib.scale}}{Scale (sdlog) parameter for the library #' size log-normal distribution, or sd parameter if a normal #' distribution is used.} +#' \item{\code{lib.dens}}{\code{\link{density}} object describing +#' the library size density.} #' } #' } #' \item{\emph{Paths parameters}}{ @@ -338,6 +340,7 @@ setClass("SplotchParams", paths.means = "list", lib.loc = "numeric", lib.scale = "numeric", + lib.dens = "density", cells.design = "data.frame"), prototype = prototype(mean.rate = 0.3, mean.shape = 0.6, @@ -357,6 +360,7 @@ setClass("SplotchParams", paths.means = list(), lib.loc = 11, lib.scale = 0.2, + lib.dens = density(rlnorm(10000, 11, 0.2)), cells.design = data.frame( Path = 1, Probability = 1, diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R index 9b6e736..6f4d13b 100644 --- a/R/SplotchParams-methods.R +++ b/R/SplotchParams-methods.R @@ -37,8 +37,9 @@ setValidity("SplotchParams", function(object) { any.missing = FALSE, min.rows = 1, ncols = 3), - lib.loc = checkNumber(v$lib.loc), - lib.scale = checkNumber(v$lib.scale, lower = 0), + lib.loc = checkmate::checkNumber(v$lib.loc), + lib.scale = checkmate::checkNumber(v$lib.scale, lower = 0), + lib.dens = checkmate::checkClass(v$lib.dens, "density"), cells.design = checkmate::checkDataFrame(v$cells.design, types = "numeric", any.missing = FALSE, @@ -133,8 +134,9 @@ setMethod("show", "SplotchParams", function(object) { "[Design]" = "paths.design")) pp.bot <- list("Library size:" = c("(Location)" = "lib.loc", - "(Scale)" = "lib.scale"), - "Cells:" = c("[Design]" = "cells.design")) + "(Scale)" = "lib.scale", + "(Density)" = "lib.dens"), + "Cells:" = c("[Design]" = "cells.design")) paths.means <- getParam(object, "paths.means") if (length(paths.means) == 0) { diff --git a/R/params-functions.R b/R/params-functions.R index 5e7eed9..3a05a05 100644 --- a/R/params-functions.R +++ b/R/params-functions.R @@ -107,10 +107,19 @@ showValues <- function(values, not.default) { len = length(values)) short.values <- sapply(values, function(x) { - if (length(x) > 4) { - paste0(paste(head(x, n = 4), collapse = ", "), ",...") + if (is.list(x)) { + classes <- class(x) + if (length(classes) == 1 && classes == "list") { + paste("List with", length(x), "items") + } else { + paste("Object of class", paste(classes, collapse = ", ")) + } } else { - paste(x, collapse = ", ") + if (length(x) > 4) { + paste0(paste(head(x, n = 4), collapse = ", "), ",...") + } else { + paste(x, collapse = ", ") + } } }) diff --git a/R/splotch-estimate.R b/R/splotch-estimate.R index d4bff44..523e07b 100644 --- a/R/splotch-estimate.R +++ b/R/splotch-estimate.R @@ -98,6 +98,7 @@ splotchEstMean <- function(norm.counts, params, verbose) { return(params) } +#' @importFrom stats density splotchEstLib <- function(counts, params, verbose) { if (verbose) {message("Estimating library size parameters...")} @@ -109,7 +110,10 @@ splotchEstLib <- function(counts, params, verbose) { lib.loc <- unname(fit$estimate["meanlog"]) lib.scale <- unname(fit$estimate["sdlog"]) - params <- setParams(params, lib.loc = lib.loc, lib.scale = lib.scale) + dens <- density(lib.sizes) + + params <- setParams(params, lib.loc = lib.loc, lib.scale = lib.scale, + lib.dens = dens) return(params) } diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R index 3fa1e82..1148c61 100644 --- a/R/splotch-simulate.R +++ b/R/splotch-simulate.R @@ -242,10 +242,13 @@ splotchSimLibSizes <- function(sim, params, verbose) { if (verbose) {message("Simulating library sizes...")} nCells <- getParam(params, "nCells") - lib.loc <- getParam(params, "lib.loc") - lib.scale <- getParam(params, "lib.scale") + # lib.loc <- getParam(params, "lib.loc") + # lib.scale <- getParam(params, "lib.scale") + lib.dens <- getParam(params, "lib.dens") + + # exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale) + exp.lib.sizes <- rejectionSample(nCells, lib.dens) - exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale) colData(sim)$ExpLibSize <- exp.lib.sizes return(sim) @@ -347,3 +350,30 @@ getBetaStepProbs <- function(steps, alpha, beta) { return(probs) } + +#' @importFrom stats approxfun +rejectionSample <- function(n, dens, lower = 0) { + + xmin <- min(dens$x) + xmax <- max(dens$x) + ymin <- min(dens$y) + ymax <- max(dens$y) + + boundary <- approxfun(dens$x, dens$y) + + values <- c() + nsel <- 0 + + while(nsel < n) { + x <- runif(1e4, xmin, xmax) + y <- runif(1e4, ymin, ymax) + sel <- y < boundary(x) & x > lower + + nsel <- nsel + sum(sel) + values <- c(values, x[sel]) + } + + values <- values[1:n] + + return(values) +} diff --git a/man/SplotchParams.Rd b/man/SplotchParams.Rd index 904b42a..2c7861c 100644 --- a/man/SplotchParams.Rd +++ b/man/SplotchParams.Rd @@ -54,6 +54,8 @@ The Splotch simulation uses the following parameters: \item{\code{lib.scale}}{Scale (sdlog) parameter for the library size log-normal distribution, or sd parameter if a normal distribution is used.} + \item{\code{lib.dens}}{\code{\link{density}} object describing + the library size density.} } } \item{\emph{Paths parameters}}{ -- GitLab