Skip to content
Snippets Groups Projects
Commit 371a3c8c authored by Luke Zappia's avatar Luke Zappia
Browse files

Merge branch 'dropout'

* dropout:
  Use development version
  Run checks
  Add group as a dropout.type option
  Add dropout.type tests
  Add dropout.type to splatSimulate
  Change dropout.present to dropout.type
parents 496c219f cee23462
No related branches found
No related tags found
No related merge requests found
Package: splatter
Type: Package
Title: Simple Simulation of Single-cell RNA Sequencing Data
Version: 1.3.4
Version: 1.3.4.9000
Date: 2018-04-19
Author: Luke Zappia
Authors@R:
......
......@@ -13,6 +13,11 @@
* Add option to use a normal distribution for library sizes in Splat simulations
### Version 1.3.3.9000 (2018-04-12)
* Replace dropout.present with dropout.type in SplatParams
* Allows more control over dropout.mid and dropout.shape
## Version 1.3.3 (2018-03-27)
* Fix parameter passing bug in scDDEstimate
......
......@@ -146,8 +146,12 @@ setClass("SimpleParams",
#' }
#' \item{\emph{Dropout parameters}}{
#' \describe{
#' \item{\code{dropout.present}}{Logical. Whether to simulate
#' dropout.}
#' \item{\code{dropout.type}}{The type of dropout to simulate.
#' "none" indicates no dropout, "experiment" is global dropout using
#' the same parameters for every cell, "batch" uses the same
#' parameters for every cell in each batch, "group" uses the same
#' parameters for every cell in each groups and "cell" uses a
#' different set of parameters for each cell.}
#' \item{\code{dropout.mid}}{Midpoint parameter for the dropout
#' logistic function.}
#' \item{\code{dropout.shape}}{Shape parameter for the dropout
......@@ -213,7 +217,7 @@ setClass("SplatParams",
de.facScale = "numeric",
bcv.common = "numeric",
bcv.df = "numeric",
dropout.present = "logical",
dropout.type = "character",
dropout.mid = "numeric",
dropout.shape = "numeric",
path.from = "numeric",
......@@ -241,7 +245,7 @@ setClass("SplatParams",
de.facScale = 0.4,
bcv.common = 0.1,
bcv.df = 60,
dropout.present = FALSE,
dropout.type = "none",
dropout.mid = 0,
dropout.shape = -1,
path.from = 0,
......
......@@ -46,9 +46,12 @@ setValidity("SplatParams", function(object) {
len = nGroups),
bcv.common = checkNumber(v$bcv.common, lower = 0),
bcv.df = checkNumber(v$bcv.df, lower = 0),
dropout.present = checkFlag(v$dropout.present),
dropout.mid = checkNumber(v$dropout.mid),
dropout.shape = checkNumber(v$dropout.shape),
dropout.type = checkCharacter(v$dropout.type, len = 1,
any.missing = FALSE),
dropout.mid = checkNumeric(v$dropout.mid, finite = TRUE,
any.missing = FALSE, min.len = 1),
dropout.shape = checkNumeric(v$dropout.shape, finite = TRUE,
any.missing = FALSE, min.len = 1),
path.from = checkIntegerish(v$path.from, lower = 0,
upper = nGroups, len = nGroups),
path.length = checkIntegerish(v$path.length, lower = 1,
......@@ -75,7 +78,15 @@ setValidity("SplatParams", function(object) {
if (!(0 %in% v$path.from)) {
checks <- c(checks, path.from = "origin must be specified in path.from")
} else if (any(v$path.from == seq_len(nGroups))) {
checks <- c(checks, stop("path cannot begin at itself"))
checks <- c(checks, "path cannot begin at itself")
}
# Check dropout type
if (!(v$dropout.type %in%
c("none", "experiment", "batch", "group", "cell"))) {
checks <- c(checks,
paste("dropout.type must be one of: 'none', 'experiment',",
"'batch', 'group', 'cell'"))
}
if (all(checks == TRUE)) {
......@@ -114,6 +125,45 @@ setMethod("setParam", "SplatParams",function(object, name, value) {
}
}
if (name == "dropout.type") {
mid.len <- length(getParam(object, "dropout.mid"))
mid.shape <- length(getParam(object, "dropout.shape"))
if ((value == "experiment")) {
if ((mid.len != 1) | (mid.shape != 1)) {
stop("dropout.type cannot be set to 'experiment' because ",
"dropout.mid and dropout.shape aren't length 1, ",
"set dropout.mid and dropout.shape first")
}
}
if ((value == "batch")) {
n <- getParam(object, "nBatches")
if ((mid.len != n) | (mid.shape != n)) {
stop("dropout.type cannot be set to 'batch' because ",
"dropout.mid and dropout.shape aren't length equal to ",
"nBatches (", n, "), set dropout.mid and dropout.shape ",
"first")
}
}
if ((value == "group")) {
n <- getParam(object, "nGroups")
if ((mid.len != n) | (mid.shape != n)) {
stop("dropout.type cannot be set to 'group' because ",
"dropout.mid and dropout.shape aren't length equal to ",
"nGroups (", n, "), set dropout.mid and dropout.shape ",
"first")
}
}
if ((value == "cell")) {
n <- getParam(object, "nCells")
if ((mid.len != n) | (mid.shape != n)) {
stop("dropout.type cannot be set to 'cell' because ",
"dropout.mid and dropout.shape aren't length equal to ",
"nCells (", n, "), set dropout.mid and dropout.shape ",
"first")
}
}
}
object <- callNextMethod()
return(object)
......@@ -142,7 +192,7 @@ setMethod("show", "SplatParams", function(object) {
"[Scale]" = "de.facScale"),
"BCV:" = c("(Common Disp)" = "bcv.common",
"(DoF)" = "bcv.df"),
"Dropout:" = c("[Present]" = "dropout.present",
"Dropout:" = c("[Type]" = "dropout.type",
"(Midpoint)" = "dropout.mid",
"(Shape)" = "dropout.shape"),
"Paths:" = c("[From]" = "path.from",
......
......@@ -361,7 +361,7 @@ splatSimBatchEffects <- function(sim, params) {
#'
#' @return SingleCellExperiment with simulated batch means.
#'
#' @importFrom SummarizedExperiment rowData rowData<-
#' @importFrom SummarizedExperiment rowData rowData<- colData
splatSimBatchCellMeans <- function(sim, params) {
nBatches <- getParam(params, "nBatches")
......@@ -707,22 +707,73 @@ splatSimTrueCounts <- function(sim, params) {
#' @importFrom stats rbinom
splatSimDropout <- function(sim, params) {
dropout.present <- getParam(params, "dropout.present")
dropout.type <- getParam(params, "dropout.type")
true.counts <- assays(sim)$TrueCounts
dropout.mid <- getParam(params, "dropout.mid")
dropout.shape <- getParam(params, "dropout.shape")
cell.names <- colData(sim)$Cell
gene.names <- rowData(sim)$Gene
nCells <- getParam(params, "nCells")
nGenes <- getParam(params, "nGenes")
nBatches <- getParam(params, "nBatches")
nGroups <- getParam(params, "nGroups")
cell.means <- assays(sim)$CellMeans
if (dropout.present) {
cell.names <- colData(sim)$Cell
gene.names <- rowData(sim)$Gene
nCells <- getParam(params, "nCells")
nGenes <- getParam(params, "nGenes")
dropout.mid <- getParam(params, "dropout.mid")
dropout.shape <- getParam(params, "dropout.shape")
cell.means <- assays(sim)$CellMeans
switch(dropout.type,
experiment = {
if ((length(dropout.mid) != 1) || length(dropout.shape) != 1) {
stop("dropout.type is set to 'experiment' but dropout.mid ",
"and dropout.shape aren't length 1")
}
dropout.mid <- rep(dropout.mid, nCells)
dropout.shape <- rep(dropout.shape, nCells)
},
batch = {
if ((length(dropout.mid) != nBatches) ||
length(dropout.shape) != nBatches) {
stop("dropout.type is set to 'batch' but dropout.mid ",
"and dropout.shape aren't length equal to nBatches ",
"(", nBatches, ")")
}
batches <- as.numeric(factor(colData(sim)$Batch))
dropout.mid <- dropout.mid[batches]
dropout.shape <- dropout.shape[batches]
},
group = {
if ((length(dropout.mid) != nGroups) ||
length(dropout.shape) != nGroups) {
stop("dropout.type is set to 'group' but dropout.mid ",
"and dropout.shape aren't length equal to nGroups ",
"(", nGroups, ")")
}
if ("Group" %in% colnames(colData(sim))) {
groups <- as.numeric(factor(colData(sim)$Group))
} else {
stop("dropout.type is set to 'group' but groups have not ",
"been simulated")
}
dropout.mid <- dropout.mid[groups]
dropout.shape <- dropout.shape[groups]
},
cell = {
if ((length(dropout.mid) != nCells) ||
length(dropout.shape) != nCells) {
stop("dropout.type is set to 'cell' but dropout.mid ",
"and dropout.shape aren't length equal to nCells ",
"(", nCells, ")")
}
})
if (dropout.type != "none") {
# Generate probabilites based on expression
drop.prob <- sapply(seq_len(nCells), function(idx) {
eta <- log(cell.means[, idx])
return(logistic(eta, x0 = dropout.mid, k = dropout.shape))
return(logistic(eta, x0 = dropout.mid[idx], k = dropout.shape[idx]))
})
# Decide which counts to keep
......
......@@ -90,8 +90,12 @@ The Splatter simulation requires the following parameters:
}
\item{\emph{Dropout parameters}}{
\describe{
\item{\code{dropout.present}}{Logical. Whether to simulate
dropout.}
\item{\code{dropout.type}}{The type of dropout to simulate.
"none" indicates no dropout, "experiment" is global dropout using
the same parameters for every cell, "batch" uses the same
parameters for every cell in each batch, "group" uses the same
parameters for every cell in each groups and "cell" uses a
different set of parameters for each cell.}
\item{\code{dropout.mid}}{Midpoint parameter for the dropout
logistic function.}
\item{\code{dropout.shape}}{Shape parameter for the dropout
......
......@@ -34,3 +34,13 @@ test_that("path.from checks work", {
expect_warning(setParam(pp, "group.prob", 1),
"nGroups has changed, resetting path.from")
})
test_that("dropout.type checks work", {
expect_error(setParam(params, "dropout.type", "cell"),
"dropout.type cannot be set to 'cell'")
pp <- setParams(params, dropout.mid = rep(1, 100),
dropout.shape = rep(1, 100))
expect_silent(setParam(pp, "dropout.type", "cell"))
expect_error(setParam(params, "dropout.type", "a"),
"dropout.type must be one of: ")
})
context("Splatter simulations")
context("Splat simulations")
test.params <- newSplatParams(nGenes = 100, batchCells = c(5, 5),
group.prob = c(0.5, 0.5), lib.scale = 0)
test_that("splatSimulate output is valid", {
expect_true(validObject(splatSimulate(test.params, method = "single",
dropout.present = TRUE)))
expect_true(validObject(splatSimulate(test.params, method = "single")))
expect_true(validObject(splatSimulate(test.params, method = "groups")))
expect_true(validObject(splatSimulate(test.params, method = "paths",
path.from = c(0, 1))))
......@@ -23,3 +22,14 @@ test_that("infinite bcv.df is detected", {
expect_warning(splatSimulate(test.params, bcv.df = Inf),
"'bcv.df' is infinite. This parameter will be ignored.")
})
test_that("dropout.type checks work", {
pp <- setParams(test.params, dropout.type = "experiment")
expect_true(validObject(splatSimulate(pp, method = "single")))
pp <- setParams(pp, dropout.mid = 1:2)
expect_error(splatSimulate(pp), "aren't length 1")
pp <- setParams(test.params, group.prob = c(0.5, 0.5),
dropout.mid = c(1, 2), dropout.shape = c(-1, -0.5),
dropout.type = "group")
expect_error(splatSimulate(pp), "groups have not been simulated")
})
......@@ -56,7 +56,7 @@ data("sc_example_counts")
# Estimate parameters from example data
params <- splatEstimate(sc_example_counts)
# Simulate data using estimated parameters
sim <- splatSimulate(params, dropout.present = FALSE)
sim <- splatSimulate(params)
```
These steps will be explained in detail in the following sections but briefly
......@@ -131,7 +131,7 @@ The parameters required for the Splat simulation are briefly described here:
* `bcv.df` - Degrees of Freedom for the BCV inverse chi-squared
distribution.
* **Dropout parameters**
* `dropout.present` - Logical. Whether to simulate dropout.
* `dropout.type` - Type of dropout to simulate.
* `dropout.mid` - Midpoint parameter for the dropout logistic function.
* `dropout.shape` - Shape parameter for the dropout logistic function.
* **Differentiation path parameters**
......@@ -249,7 +249,7 @@ can provide them as additional arguments, alternatively if we don't supply any
parameters the defaults will be used:
```{r splatSimulate}
sim <- splatSimulate(params, nGenes = 1000, dropout.present = FALSE, seed = 0)
sim <- splatSimulate(params, nGenes = 1000)
sim
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment