diff --git a/DESCRIPTION b/DESCRIPTION index 237c3bec3679302ea79e73c46f04026962614159..a64f3c8c184299488d6b3cf24566e55e5e3d4586 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 1.9.3.9007 -Date: 2019-08-14 +Version: 1.9.3.9008 +Date: 2019-08-20 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 325e91f285453e3e86ecefbbd889e1ad468b2486..5b71bdc45865846aeb2fa9a084d4a0b4912f7d15 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +### Version 1.9.3.9008 (2019-08-20) + +* Add expression outliers to Splotch + ### Version 1.9.3.9007 (2019-08-14) * Fix bug in selectFit @@ -8,8 +12,8 @@ ### Version 1.9.3.9005 (2019-08-08) -* Simulate counts -* Split into separate functions for each stage +* Simulate counts in splotchSimulate +* Split splotchSimulate into separate functions for each stage ### Version 1.9.3.9004 (2019-08-08) @@ -21,17 +25,17 @@ ### Version 1.9.2.9003 (2019-07-17) -* Topologically sort paths -* Add library size parameters -* Simulate cell means +* Topologically sort Splotch paths +* Add library size parameters to SplotchParams +* Simulate cell means in splotchSimulate ### Version 1.9.2.9002 (2019-07-16) -* Add paths parameters +* Add paths parameters to SplotchParams ### Version 1.9.2.9001 (2019-07-16) -* Add mean parameters +* Add mean parameters to SplotchParams ### Version 1.9.2.9000 (2019-07-11) diff --git a/R/AllClasses.R b/R/AllClasses.R index e497256691177e8669471ef58dcaf57b0cebcf63..4124ac3fd8cfc0012ba40b3ba153f362cc675de4 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -273,6 +273,12 @@ setClass("SplatParams", #' distribution.} #' \item{\code{mean.rate}}{Rate parameter for the mean gamma #' distribution.} +#' \item{\code{mean.outProb}}{Probability that a gene is an +#' expression outlier.} +#' \item{\code{mean.outFacLoc}}{Location (meanlog) parameter for +#' the expression outlier factor log-normal distribution.} +#' \item{\code{mean.outFacScale}}{Scale (sdlog) parameter for the +#' expression outlier factor log-normal distribution.} #' \item{\code{mean.values}}{Vector of means for each gene.} #' } #' } @@ -320,6 +326,9 @@ setClass("SplotchParams", contains = "Params", slots = c(mean.shape = "numeric", mean.rate = "numeric", + mean.outProb = "numeric", + mean.outLoc = "numeric", + mean.outScale = "numeric", mean.values = "numeric", network.graph = "ANY", network.nRegs = "numeric", @@ -332,6 +341,9 @@ setClass("SplotchParams", cells.design = "data.frame"), prototype = prototype(mean.rate = 0.3, mean.shape = 0.6, + mean.outProb = 0.05, + mean.outLoc = 4, + mean.outScale = 0.5, mean.values = numeric(), network.graph = NULL, network.nRegs = 100, diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R index b673e030f68505cb73c7d507462cdeeba4194090..9b6e736ad000c7f73be0c2b93484eb5ed98127cc 100644 --- a/R/SplotchParams-methods.R +++ b/R/SplotchParams-methods.R @@ -22,6 +22,9 @@ setValidity("SplotchParams", function(object) { seed = checkmate::checkInt(v$seed, lower = 0), mean.rate = checkmate::checkNumber(v$mean.rate, lower = 0), mean.shape = checkmate::checkNumber(v$mean.shape, lower = 0), + mean.outProb = checkNumber(v$mean.outProb, lower = 0, upper = 1), + mean.outLoc = checkNumber(v$mean.outLoc), + mean.outScale = checkNumber(v$mean.outScale, lower = 0), network.graph = checkmate::checkClass(v$network, "igraph", null.ok = TRUE), network.nRegs = checkmate::checkInt(v$network.nRegs, @@ -115,9 +118,12 @@ setValidity("SplotchParams", function(object) { #' @importFrom methods show setMethod("show", "SplotchParams", function(object) { - pp.top <- list("Mean:" = c("(Rate)" = "mean.rate", - "(Shape)" = "mean.shape", - "[Values]*" = "mean.values")) + pp.top <- list("Mean:" = c("(Rate)" = "mean.rate", + "(Shape)" = "mean.shape", + "(Out Prob)" = "mean.outProb", + "(Out Location)" = "mean.outLoc", + "(Out Scale)" = "mean.outScale", + "[Values]*" = "mean.values")) pp.network <- list("Network:" = c("[Graph]" = "network.graph", "[nRegs]" = "network.nRegs", diff --git a/R/splotch-estimate.R b/R/splotch-estimate.R index 7e3230f6e2b5cec2090593ecd84415c6be3e27bf..36a079216af2b9b338613e12e0e877fe1dd51c7d 100644 --- a/R/splotch-estimate.R +++ b/R/splotch-estimate.R @@ -72,6 +72,28 @@ splotchEstMean <- function(norm.counts, params, verbose) { params <- setParams(params, mean.shape = unname(fit$estimate["shape"]), mean.rate = unname(fit$estimate["rate"])) + if (verbose) {message("Estimating expression outlier parameters...")} + lmeans <- log(means) + med <- median(lmeans) + mad <- mad(lmeans) + + bound <- med + 1 * mad + + outs <- which(lmeans > bound) + + prob <- length(outs) / nrow(norm.counts) + + params <- setParam(params, "mean.outProb", prob) + + if (length(outs) > 1) { + facs <- means[outs] / median(means) + fit <- selectFit(facs, "lnorm", verbose = verbose) + + params <- setParams(params, + mean.outLoc = unname(fit$estimate["meanlog"]), + mean.outScale = unname(fit$estimate["sdlog"])) + } + return(params) } @@ -163,15 +185,15 @@ selectFit <- function(data, distr, weights = NULL, verbose = TRUE) { ) } - aics <- fitdistrplus::gofstat(fits)$aic - # Flatten in case aics is a list - aics.flat <- unlist(aics) - selected <- which(aics.flat == min(aics.flat, na.rm = TRUE)) + scores <- fitdistrplus::gofstat(fits)$cvm + # Flatten in case scores is a list + scores.flat <- unlist(scores) + selected <- which(scores.flat == min(scores.flat, na.rm = TRUE)) if (verbose) { - # Work around to get name in case aics is a list - name <- names(fits)[names(aics) == names(aics.flat)[selected]] - message("Selected ", name, " fit using AIC") + # Work around to get name in case scores is a list + name <- names(fits)[names(scores) == names(scores.flat)[selected]] + message("Selected ", name, " fit") } return(fits[[selected]]) diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R index ad3f1a132dc821c3588c5741f26eac6f1b462b15..3fa1e829eab170ebf04e993679c0d0f3eb6ca44f 100644 --- a/R/splotch-simulate.R +++ b/R/splotch-simulate.R @@ -148,8 +148,19 @@ splotchSimGeneMeans <- function(params, verbose) { nGenes <- getParam(params, "nGenes") mean.shape <- getParam(params, "mean.shape") mean.rate <- getParam(params, "mean.rate") + mean.outProb <- getParam(params, "mean.outProb") + mean.outLoc <- getParam(params, "mean.outLoc") + mean.outScale <- getParam(params, "mean.outScale") mean.values <- rgamma(nGenes, shape = mean.shape, rate = mean.rate) + + outlier.facs <- getLNormFactors(nGenes, mean.outProb, 0, mean.outLoc, + mean.outScale) + median.means.gene <- median(mean.values) + outlier.means <- median.means.gene * outlier.facs + is.outlier <- outlier.facs != 1 + mean.values[is.outlier] <- outlier.means[is.outlier] + params <- setParam(params, "mean.values", mean.values) return(params) diff --git a/man/SplotchParams.Rd b/man/SplotchParams.Rd index 3c437d2a0e6203122d040fdeff9e74f96982e67b..904b42ad5703103a0ef480cc44535ce55343a673 100644 --- a/man/SplotchParams.Rd +++ b/man/SplotchParams.Rd @@ -23,6 +23,12 @@ The Splotch simulation uses the following parameters: distribution.} \item{\code{mean.rate}}{Rate parameter for the mean gamma distribution.} + \item{\code{mean.outProb}}{Probability that a gene is an + expression outlier.} + \item{\code{mean.outFacLoc}}{Location (meanlog) parameter for + the expression outlier factor log-normal distribution.} + \item{\code{mean.outFacScale}}{Scale (sdlog) parameter for the + expression outlier factor log-normal distribution.} \item{\code{mean.values}}{Vector of means for each gene.} } }