From 7d1fcd34bef4c2aa9d11344d38285c8c9bc7ba51 Mon Sep 17 00:00:00 2001 From: Luke Zappia <lazappi@users.noreply.github.com> Date: Thu, 22 Aug 2019 15:08:15 +1000 Subject: [PATCH] Add density sampling option for means, lib sizes --- DESCRIPTION | 4 +-- NEWS.md | 4 +++ R/AllClasses.R | 18 +++++++++++- R/SplotchParams-methods.R | 20 ++++++++++---- R/splotch-estimate.R | 8 ++++-- R/splotch-simulate.R | 58 +++++++++++++++++++++++++-------------- man/SplotchParams.Rd | 11 +++++++- 7 files changed, 92 insertions(+), 31 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b036793..1b17759 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 1.9.4.9009 -Date: 2019-08-21 +Version: 1.9.4.9010 +Date: 2019-08-22 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 8ccfac0..99438d9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +### Version 1.9.4.9010 (2019-08-22) + +* Add density sampling options for means and library sizes + ### Version 1.9.4.9009 (2019-08-21) * Replace library size log-normal with density and rejection sampling diff --git a/R/AllClasses.R b/R/AllClasses.R index 3af99fe..f14b964 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -279,7 +279,13 @@ setClass("SplatParams", #' the expression outlier factor log-normal distribution.} #' \item{\code{mean.outFacScale}}{Scale (sdlog) parameter for the #' expression outlier factor log-normal distribution.} -#' \item{\code{mean.values}}{Vector of means for each gene.} +#' \item{\code{mean.dens}}{\code{\link{density}} object describing +#' the log gene mean density.} +#' \item{\code{[mean.method]}}{Method to use for simulating gene +#' means. Either "fit" to sample from a gamma distribution (with +#' expression outliers) or "density" to sample from the provided +#' density object.} +#' \item{\code{[mean.values]}}{Vector of means for each gene.} #' } #' } #' \item{\emph{Network parameters}}{ @@ -306,6 +312,9 @@ setClass("SplatParams", #' distribution is used.} #' \item{\code{lib.dens}}{\code{\link{density}} object describing #' the library size density.} +#' \item{\code{[lib.method]}}{Method to use for simulating library +#' sizes. Either "fit" to sample from a log-normal distribution or +#' "density" to sample from the provided density object.} #' } #' } #' \item{\emph{Paths parameters}}{ @@ -331,6 +340,8 @@ setClass("SplotchParams", mean.outProb = "numeric", mean.outLoc = "numeric", mean.outScale = "numeric", + mean.dens = "density", + mean.method = "character", mean.values = "numeric", network.graph = "ANY", network.nRegs = "numeric", @@ -341,12 +352,16 @@ setClass("SplotchParams", lib.loc = "numeric", lib.scale = "numeric", lib.dens = "density", + lib.method = "character", cells.design = "data.frame"), prototype = prototype(mean.rate = 0.3, mean.shape = 0.6, mean.outProb = 0.05, mean.outLoc = 4, mean.outScale = 0.5, + mean.dens = density(rgamma(10000, rate = 0.3, + shape = 0.6)), + mean.method = "fit", mean.values = numeric(), network.graph = NULL, network.nRegs = 100, @@ -361,6 +376,7 @@ setClass("SplotchParams", lib.loc = 11, lib.scale = 0.2, lib.dens = density(rlnorm(10000, 11, 0.2)), + lib.method = "fit", cells.design = data.frame( Path = 1, Probability = 1, diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R index 6f4d13b..56a1b6c 100644 --- a/R/SplotchParams-methods.R +++ b/R/SplotchParams-methods.R @@ -25,6 +25,9 @@ setValidity("SplotchParams", function(object) { mean.outProb = checkNumber(v$mean.outProb, lower = 0, upper = 1), mean.outLoc = checkNumber(v$mean.outLoc), mean.outScale = checkNumber(v$mean.outScale, lower = 0), + mean.dens = checkmate::checkClass(v$mean.dens, "density"), + mean.method = checkmate::checkChoice(v$mean.method, + c("fit", "density")), network.graph = checkmate::checkClass(v$network, "igraph", null.ok = TRUE), network.nRegs = checkmate::checkInt(v$network.nRegs, @@ -40,6 +43,8 @@ setValidity("SplotchParams", function(object) { lib.loc = checkmate::checkNumber(v$lib.loc), lib.scale = checkmate::checkNumber(v$lib.scale, lower = 0), lib.dens = checkmate::checkClass(v$lib.dens, "density"), + lib.method = checkmate::checkChoice(v$lib.method, + c("fit", "density")), cells.design = checkmate::checkDataFrame(v$cells.design, types = "numeric", any.missing = FALSE, @@ -124,6 +129,8 @@ setMethod("show", "SplotchParams", function(object) { "(Out Prob)" = "mean.outProb", "(Out Location)" = "mean.outLoc", "(Out Scale)" = "mean.outScale", + "(Density)" = "mean.density", + "[Method]" = "mean.method", "[Values]*" = "mean.values")) pp.network <- list("Network:" = c("[Graph]" = "network.graph", @@ -135,7 +142,8 @@ setMethod("show", "SplotchParams", function(object) { pp.bot <- list("Library size:" = c("(Location)" = "lib.loc", "(Scale)" = "lib.scale", - "(Density)" = "lib.dens"), + "(Density)" = "lib.dens", + "[Method]" = "lib.method"), "Cells:" = c("[Design]" = "cells.design")) paths.means <- getParam(object, "paths.means") @@ -205,10 +213,12 @@ setMethod("setParam", "SplotchParams", function(object, name, value) { if (name == "network.graph") { checkmate::assertClass(value, "igraph") - object <- setParamUnchecked(object, "nGenes", igraph::gorder(value)) - if (!(length(getParam(object, "mean.values")) == 0)) { - warning("changing network.graph resets mean.values") - object <- setParam(object, "mean.values", numeric()) + if (getParam(object, "nGenes") != igraph::gorder(value)) { + if (!(length(getParam(object, "mean.values")) == 0)) { + warning("changing network.graph resets mean.values") + object <- setParam(object, "mean.values", numeric()) + } + object <- setParamUnchecked(object, "nGenes", igraph::gorder(value)) } if ("IsReg" %in% igraph::vertex_attr_names(value)) { diff --git a/R/splotch-estimate.R b/R/splotch-estimate.R index 523e07b..cae0f88 100644 --- a/R/splotch-estimate.R +++ b/R/splotch-estimate.R @@ -78,7 +78,7 @@ splotchEstMean <- function(norm.counts, params, verbose) { med <- median(lmeans) mad <- mad(lmeans) - bound <- med + 1 * mad + bound <- med + 2 * mad outs <- which(lmeans > bound) @@ -88,13 +88,16 @@ splotchEstMean <- function(norm.counts, params, verbose) { if (length(outs) > 1) { facs <- means[outs] / median(means) - fit <- selectFit(facs, "lnorm", verbose = verbose) + #fit <- selectFit(facs, "lnorm", verbose = verbose) + fit <- fitdistrplus::fitdist(facs, "lnorm") params <- setParams(params, mean.outLoc = unname(fit$estimate["meanlog"]), mean.outScale = unname(fit$estimate["sdlog"])) } + params <- setParams(params, mean.dens = density(lmeans)) + return(params) } @@ -191,6 +194,7 @@ selectFit <- function(data, distr, weights = NULL, verbose = TRUE) { } scores <- fitdistrplus::gofstat(fits)$cvm + # Flatten in case scores is a list scores.flat <- unlist(scores) selected <- which(scores.flat == min(scores.flat, na.rm = TRUE)) diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R index 1148c61..5632389 100644 --- a/R/splotch-simulate.R +++ b/R/splotch-simulate.R @@ -146,20 +146,30 @@ splotchSimGeneMeans <- function(params, verbose) { if (verbose) {message("Simulating means...")} nGenes <- getParam(params, "nGenes") - mean.shape <- getParam(params, "mean.shape") - mean.rate <- getParam(params, "mean.rate") - mean.outProb <- getParam(params, "mean.outProb") - mean.outLoc <- getParam(params, "mean.outLoc") - mean.outScale <- getParam(params, "mean.outScale") - - mean.values <- rgamma(nGenes, shape = mean.shape, rate = mean.rate) - - outlier.facs <- getLNormFactors(nGenes, mean.outProb, 0, mean.outLoc, - mean.outScale) - median.means.gene <- median(mean.values) - outlier.means <- median.means.gene * outlier.facs - is.outlier <- outlier.facs != 1 - mean.values[is.outlier] <- outlier.means[is.outlier] + mean.method <- getParam(params, "mean.method") + + if (mean.method == "fit") { + if (verbose) {message("Sampling from gamma distribution...")} + mean.shape <- getParam(params, "mean.shape") + mean.rate <- getParam(params, "mean.rate") + mean.outProb <- getParam(params, "mean.outProb") + mean.outLoc <- getParam(params, "mean.outLoc") + mean.outScale <- getParam(params, "mean.outScale") + + mean.values <- rgamma(nGenes, shape = mean.shape, rate = mean.rate) + + outlier.facs <- getLNormFactors(nGenes, mean.outProb, 0, mean.outLoc, + mean.outScale) + median.means.gene <- median(mean.values) + outlier.means <- median.means.gene * outlier.facs + is.outlier <- outlier.facs != 1 + mean.values[is.outlier] <- outlier.means[is.outlier] + } else if (mean.method == "density") { + if (verbose) {message("Sampling from density object...")} + mean.dens <- getParam(params, "mean.dens") + + mean.values <- exp(sampleDensity(nGenes, mean.dens, lower = -Inf)) + } params <- setParam(params, "mean.values", mean.values) @@ -242,12 +252,20 @@ splotchSimLibSizes <- function(sim, params, verbose) { if (verbose) {message("Simulating library sizes...")} nCells <- getParam(params, "nCells") - # lib.loc <- getParam(params, "lib.loc") - # lib.scale <- getParam(params, "lib.scale") - lib.dens <- getParam(params, "lib.dens") + lib.method <- getParam(params, "lib.method") - # exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale) - exp.lib.sizes <- rejectionSample(nCells, lib.dens) + if (lib.method == "fit") { + if (verbose) {message("Sampling from log-normal distribution...")} + lib.loc <- getParam(params, "lib.loc") + lib.scale <- getParam(params, "lib.scale") + + exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale) + } else if (lib.method == "density") { + if (verbose) {message("Sampling from density object...")} + lib.dens <- getParam(params, "lib.dens") + + exp.lib.sizes <- sampleDensity(nCells, lib.dens) + } colData(sim)$ExpLibSize <- exp.lib.sizes @@ -352,7 +370,7 @@ getBetaStepProbs <- function(steps, alpha, beta) { } #' @importFrom stats approxfun -rejectionSample <- function(n, dens, lower = 0) { +sampleDensity <- function(n, dens, lower = 0) { xmin <- min(dens$x) xmax <- max(dens$x) diff --git a/man/SplotchParams.Rd b/man/SplotchParams.Rd index 2c7861c..0b03061 100644 --- a/man/SplotchParams.Rd +++ b/man/SplotchParams.Rd @@ -29,7 +29,13 @@ The Splotch simulation uses the following parameters: the expression outlier factor log-normal distribution.} \item{\code{mean.outFacScale}}{Scale (sdlog) parameter for the expression outlier factor log-normal distribution.} - \item{\code{mean.values}}{Vector of means for each gene.} + \item{\code{mean.dens}}{\code{\link{density}} object describing + the log gene mean density.} + \item{\code{[mean.method]}}{Method to use for simulating gene + means. Either "fit" to sample from a gamma distribution (with + expression outliers) or "density" to sample from the provided + density object.} + \item{\code{[mean.values]}}{Vector of means for each gene.} } } \item{\emph{Network parameters}}{ @@ -56,6 +62,9 @@ The Splotch simulation uses the following parameters: distribution is used.} \item{\code{lib.dens}}{\code{\link{density}} object describing the library size density.} + \item{\code{[lib.method]}}{Method to use for simulating library + sizes. Either "fit" to sample from a log-normal distribution or + "density" to sample from the provided density object.} } } \item{\emph{Paths parameters}}{ -- GitLab