Skip to content
Snippets Groups Projects
Commit dc9f6823 authored by Luke Zappia's avatar Luke Zappia
Browse files

Add expression outliers to Splotch

parent a3d19d22
No related branches found
No related tags found
No related merge requests found
Package: splatter
Type: Package
Title: Simple Simulation of Single-cell RNA Sequencing Data
Version: 1.9.3.9007
Date: 2019-08-14
Version: 1.9.3.9008
Date: 2019-08-20
Author: Luke Zappia
Authors@R:
c(person("Luke", "Zappia", role = c("aut", "cre"),
......
### Version 1.9.3.9008 (2019-08-20)
* Add expression outliers to Splotch
### Version 1.9.3.9007 (2019-08-14)
* Fix bug in selectFit
......@@ -8,8 +12,8 @@
### Version 1.9.3.9005 (2019-08-08)
* Simulate counts
* Split into separate functions for each stage
* Simulate counts in splotchSimulate
* Split splotchSimulate into separate functions for each stage
### Version 1.9.3.9004 (2019-08-08)
......@@ -21,17 +25,17 @@
### Version 1.9.2.9003 (2019-07-17)
* Topologically sort paths
* Add library size parameters
* Simulate cell means
* Topologically sort Splotch paths
* Add library size parameters to SplotchParams
* Simulate cell means in splotchSimulate
### Version 1.9.2.9002 (2019-07-16)
* Add paths parameters
* Add paths parameters to SplotchParams
### Version 1.9.2.9001 (2019-07-16)
* Add mean parameters
* Add mean parameters to SplotchParams
### Version 1.9.2.9000 (2019-07-11)
......
......@@ -273,6 +273,12 @@ setClass("SplatParams",
#' distribution.}
#' \item{\code{mean.rate}}{Rate parameter for the mean gamma
#' distribution.}
#' \item{\code{mean.outProb}}{Probability that a gene is an
#' expression outlier.}
#' \item{\code{mean.outFacLoc}}{Location (meanlog) parameter for
#' the expression outlier factor log-normal distribution.}
#' \item{\code{mean.outFacScale}}{Scale (sdlog) parameter for the
#' expression outlier factor log-normal distribution.}
#' \item{\code{mean.values}}{Vector of means for each gene.}
#' }
#' }
......@@ -320,6 +326,9 @@ setClass("SplotchParams",
contains = "Params",
slots = c(mean.shape = "numeric",
mean.rate = "numeric",
mean.outProb = "numeric",
mean.outLoc = "numeric",
mean.outScale = "numeric",
mean.values = "numeric",
network.graph = "ANY",
network.nRegs = "numeric",
......@@ -332,6 +341,9 @@ setClass("SplotchParams",
cells.design = "data.frame"),
prototype = prototype(mean.rate = 0.3,
mean.shape = 0.6,
mean.outProb = 0.05,
mean.outLoc = 4,
mean.outScale = 0.5,
mean.values = numeric(),
network.graph = NULL,
network.nRegs = 100,
......
......@@ -22,6 +22,9 @@ setValidity("SplotchParams", function(object) {
seed = checkmate::checkInt(v$seed, lower = 0),
mean.rate = checkmate::checkNumber(v$mean.rate, lower = 0),
mean.shape = checkmate::checkNumber(v$mean.shape, lower = 0),
mean.outProb = checkNumber(v$mean.outProb, lower = 0, upper = 1),
mean.outLoc = checkNumber(v$mean.outLoc),
mean.outScale = checkNumber(v$mean.outScale, lower = 0),
network.graph = checkmate::checkClass(v$network, "igraph",
null.ok = TRUE),
network.nRegs = checkmate::checkInt(v$network.nRegs,
......@@ -115,9 +118,12 @@ setValidity("SplotchParams", function(object) {
#' @importFrom methods show
setMethod("show", "SplotchParams", function(object) {
pp.top <- list("Mean:" = c("(Rate)" = "mean.rate",
"(Shape)" = "mean.shape",
"[Values]*" = "mean.values"))
pp.top <- list("Mean:" = c("(Rate)" = "mean.rate",
"(Shape)" = "mean.shape",
"(Out Prob)" = "mean.outProb",
"(Out Location)" = "mean.outLoc",
"(Out Scale)" = "mean.outScale",
"[Values]*" = "mean.values"))
pp.network <- list("Network:" = c("[Graph]" = "network.graph",
"[nRegs]" = "network.nRegs",
......
......@@ -72,6 +72,28 @@ splotchEstMean <- function(norm.counts, params, verbose) {
params <- setParams(params, mean.shape = unname(fit$estimate["shape"]),
mean.rate = unname(fit$estimate["rate"]))
if (verbose) {message("Estimating expression outlier parameters...")}
lmeans <- log(means)
med <- median(lmeans)
mad <- mad(lmeans)
bound <- med + 1 * mad
outs <- which(lmeans > bound)
prob <- length(outs) / nrow(norm.counts)
params <- setParam(params, "mean.outProb", prob)
if (length(outs) > 1) {
facs <- means[outs] / median(means)
fit <- selectFit(facs, "lnorm", verbose = verbose)
params <- setParams(params,
mean.outLoc = unname(fit$estimate["meanlog"]),
mean.outScale = unname(fit$estimate["sdlog"]))
}
return(params)
}
......@@ -163,15 +185,15 @@ selectFit <- function(data, distr, weights = NULL, verbose = TRUE) {
)
}
aics <- fitdistrplus::gofstat(fits)$aic
# Flatten in case aics is a list
aics.flat <- unlist(aics)
selected <- which(aics.flat == min(aics.flat, na.rm = TRUE))
scores <- fitdistrplus::gofstat(fits)$cvm
# Flatten in case scores is a list
scores.flat <- unlist(scores)
selected <- which(scores.flat == min(scores.flat, na.rm = TRUE))
if (verbose) {
# Work around to get name in case aics is a list
name <- names(fits)[names(aics) == names(aics.flat)[selected]]
message("Selected ", name, " fit using AIC")
# Work around to get name in case scores is a list
name <- names(fits)[names(scores) == names(scores.flat)[selected]]
message("Selected ", name, " fit")
}
return(fits[[selected]])
......
......@@ -148,8 +148,19 @@ splotchSimGeneMeans <- function(params, verbose) {
nGenes <- getParam(params, "nGenes")
mean.shape <- getParam(params, "mean.shape")
mean.rate <- getParam(params, "mean.rate")
mean.outProb <- getParam(params, "mean.outProb")
mean.outLoc <- getParam(params, "mean.outLoc")
mean.outScale <- getParam(params, "mean.outScale")
mean.values <- rgamma(nGenes, shape = mean.shape, rate = mean.rate)
outlier.facs <- getLNormFactors(nGenes, mean.outProb, 0, mean.outLoc,
mean.outScale)
median.means.gene <- median(mean.values)
outlier.means <- median.means.gene * outlier.facs
is.outlier <- outlier.facs != 1
mean.values[is.outlier] <- outlier.means[is.outlier]
params <- setParam(params, "mean.values", mean.values)
return(params)
......
......@@ -23,6 +23,12 @@ The Splotch simulation uses the following parameters:
distribution.}
\item{\code{mean.rate}}{Rate parameter for the mean gamma
distribution.}
\item{\code{mean.outProb}}{Probability that a gene is an
expression outlier.}
\item{\code{mean.outFacLoc}}{Location (meanlog) parameter for
the expression outlier factor log-normal distribution.}
\item{\code{mean.outFacScale}}{Scale (sdlog) parameter for the
expression outlier factor log-normal distribution.}
\item{\code{mean.values}}{Vector of means for each gene.}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment