Newer
Older
#' Create splatParams object
#'
#' S3 class for holding Splatter simulation parameters.
#'
#' @param ... parameters to set in the new params object, passed to
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#'
#' @details
#' The splatParams object is a list based S3 object for holding simulation
#' parameters. It has the following sections and values:
#'
#' \itemize{
#' \item nGenes - Number of genes to simulate.
#' \item nCells - Number of cells to simulate.
#' \item [groupCells] - Vector giving the number of cells in each simulation
#' group/path.
#' \item mean (mean parameters)
#' \itemize{
#' \item rate - Rate parameter for the mean gamma simulation.
#' \item shape - Shape parameter for the mean gamma simulation.
#' }
#' \item lib (library size parameters)
#' \itemize{
#' \item loc - Location (meanlog) parameter for the library size
#' log-normal distribution.
#' \item scale - Scale (sdlog) parameter for the library size log-normal
#' distribution.
#' }
#' \item out (expression outlier parameters)
#' \itemize{
#' \item prob - Probability that a gene is an expression outlier.
#' \item loProb - Probability that an expression outlier gene is lowly
#' expressed.
#' \item facLoc - Location (meanlog) parameter for the expression outlier
#' factor log-normal distribution.
#' \item facScale - Scale (sdlog) parameter for the expression outlier
#' factor log-normal distribution.
#' }
#' \item de (differential expression parameters)
#' \itemize{
#' \item [prob] - Probability that a gene is differentially expressed
#' between groups or paths.
#' \item [downProb] - Probability that differentially expressed gene is
#' down-regulated.
#' \item [facLoc] - Location (meanlog) parameter for the differential
#' expression factor log-normal distribution.
#' \item [facScale] - Scale (sdlog) parameter for the differential
#' expression factor log-normal distribution.
#' }
#' \item bcv (Biological Coefficient of Variation parameters)
#' \itemize{
#' \item common - Underlying common dispersion across all genes.
#' \item DF - Degrees of Freedom for the BCV inverse chi-squared
#' distribution.
#' }
#' \item dropout (dropout parameters)
#' \itemize{
#' \item present - Logical. Whether to simulate dropout.
#' \item mid - Midpoint parameter for the dropout logistic function.
#' \item shape - Shape parameter for the dropout logistic function.
#' }
#' \item path (differentiation path parameters)
#' \itemize{
#' \item [from] - Vector giving the originating point of each path. This
#' allows path structure such as a cell type which differentiates
#' into an intermediate cell type that then differentiates into two
#' mature cell types. A path structure of this form would have a
#' "from" parameter of c(0, 1, 1) (where 0 is the origin). If no
#' vector is given all paths will start at the origin.
#' \item [length] - Vector giving the number of steps to simulate along
#' each path. If a single value is given it will be applied to all
#' paths.
#' \item [skew] - Vector giving the skew of each path. Values closer to 1
#' will give more cells towards the starting population, values
#' closer to 0 will give more cells towards the final population.
#' If a single value is given it will be applied to all paths.
#' \item [nonlinearProb] - Probability that a gene follows a non-linear
#' path along the differentiation path. This allows more complex
#' gene patterns such as a gene being equally expressed at the
#' beginning an end of a path but lowly expressed in the middle.
#' \item [sigmaFac] - Sigma factor for non-linear gene paths. A higher
#' value will result in more extreme non-linear variations along a
#' path.
#' }
#' }
#'
#' Those shown in brackets cannot currently be easily estimated from a real
#' dataset by Splatter. This is also shown when a splatParams object is printed
#' with parameters labelled as either (estimatable) or [not estimatable].
#'
#' @return List based S3 splatParams object
#' @examples
#' params <- splatParams()
#' params
#' @export
splatParams <- function(...) {
params <- list(nGenes = NA, nCells = NA, groupCells = NA,
mean = list(rate = NA, shape = NA),
lib = list(loc = NA, scale = NA),
out = list(prob = NA, loProb = NA, facLoc = NA,
facScale = NA),
de = list(prob = NA, downProb = NA, facLoc = NA,
facScale = NA),
bcv = list(common = NA, DF = NA),
dropout = list(present = NA, mid = NA, shape = NA),
path = list(from = NA, length = NA, skew = NA,
nonlinearProb = NA, sigmaFac = NA))
class(params) <- "splatParams"
params <- setParams(params, ...)
return(params)
}
#' Print splatParams object
#'
#' Pretty print the parameters stored in a splatParams object. Parameters are
#' labelled as either (estimatable) or [not estimatable].
#'
#' @param x splatParams object to print.
#' @param ... further arguments passed to or from other methods.
#'
#' @return Invisibly returns x (via \code{\link{invisible}(x)})
#'
#' @examples
#' params <- defaultParams()
#' print(params)
#' @export
print.splatParams <- function(x, ...) {
pp <- list("Global:" = c("(Genes)" = "nGenes",
"(Cells)" = "nCells",
"[Group Cells]" = "groupCells"),
"Mean:" = c("(Rate)" = "mean.rate",
"(Shape)" = "mean.shape"),
"Library size:" = c("(Location)" = "lib.loc",
"(Scale)" = "lib.scale"),
"Exprs outliers:" = c("(Probability)" = "out.prob",
"(Lo Prob)" = "out.loProb",
"(Location)" = "out.facLoc",
"(Scale)" = "out.facScale"),
"Differential exprs:" = c("[Probability]" = "de.prob",
"[Down Prob]" = "de.downProb",
"[Location]" = "de.facLoc",
"[Scale]" = "de.facScale"),
"BCV:" = c("(Common Disp)" = "bcv.common",
"(DoF)" = "bcv.DF"),
"Dropout:" = c("(Present T/F)" = "dropout.present",
"(Midpoint)" = "dropout.mid",
"(Shape)" = "dropout.shape"),
"Paths:" = c("[From]" = "path.from",
"[Length]" = "path.length",
"[Skew]" = "path.skew",
"[Non-linear]" = "path.nonlinearProb",
"[Sigma Factor]" = "path.sigmaFac"))
parameters <- getParams(x, pp[[category]])
parameters <- sapply(parameters, paste, collapse = ", ")
names(parameters) <- names(pp[[category]])
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#' Update a splatParams object
#'
#' Set any of the parameters in a splatParams object to have a new value.
#'
#' @param params the splatParams object to update.
#' @param ... Any parameters to set.
#'
#' @details
#' This function allows multiple parameters to be updated or set using a single
#' simple function call. Parameters to update are specified by supplying
#' additional arguments that follow the levels of the splatParams data structure
#' separated by the "." character. For example
#' \code{setParams(params, nGenes = 100)} is equivalent to
#' \code{params$nGenes <- 100} and \code{update(params, mean.rate = 1)} is
#' equivalent to \code{params$mean$rate <- 1}. For more details of the available
#' parameters and the splatParams data structure see \code{\link{splatParams}}.
#'
#' @return splatParms object with updated parameters
#' @examples
#' params <- defaultParams()
#' params
#' # Set nGenes and nCells
#' params <- setParams(params, nGenes = 1000, nCells = 200)
#' params
#' # Set mean rate paramater and library size location parameter
#' params <- setParams(params, mean.rate = 1, lib.loc = 12)
#' params
#' @export
setParams <- function(params, ...) {
update <- list(...)
if (length(update) == 0) {
return(params)
}
names <- strsplit(names(update), ".", fixed = TRUE)
for (idx in seq_along(names)) {
name <- names[[idx]]
value <- update[[idx]]
if (length(name) == 1) {
params[[name]] <- value
} else {
params[[name[1]]][[name[2]]] <- value
}
}
return(params)
}
#' Get parameters from splatParams object
#'
#' Get values for the parameters in a splatParams object. Uses the same pattern
#' (category.parameter) as \code{\link{setParams}}.
#'
#' @param params splatParams object to get parameters from.
#' @param names vector of parameter names to extract.
#'
#' @return Vector if all selected parameters are single values, otherwise a
#' list.
#' @examples
#' params <- defaultParams()
#' # Get the number of genes
#' getParams(params, "nGenes")
#' # Get the number of genes and the mean rate parameter
#' getParams(params, c("nGenes", "mean.rate"))
#' # Returns a list if one of the selected parameters is a vector
#' params <- setParams(params, groupCells = c(100, 200))
#' @export
getParams <- function(params, names) {
if (length(names) == 0) {
return(NULL)
}
output <- list()
keep.list <- FALSE
for (idx in seq_along(names)) {
name <- names[[idx]]
name.split <- strsplit(name, ".", fixed = TRUE)[[1]]
if (length(name) == 1) {
value <- params[[name.split]]
} else {
value <- params[[name.split[1]]][[name.split[2]]]
}
output[[name]] <- value
if (length(value) > 1) {
keep.list <- TRUE
}
}
if (!keep.list) {
output <- unlist(output)
} else if (length(output) == 1) {
output <- output[[1]]
}
return(output)
}
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
#' Check splatParams object
#'
#' Check that a splatParams object has valid parameter values.
#'
#' @param params splatParams object to check
#'
#' @details
#' The following checks are made:
#' \itemize{
#' \item{Input has "splatParams" class}
#' \item{Logical parameters are logical}
#' \item{Numeric parameters are numeric}
#' \item{Positive numeric parameters are positive}
#' \item{Integer parameters are integers}
#' \item{Probability parameters are in the range 0-1}
#' \item{Vector parameters are the correct length}
#' \item{Vector parameters do not contain NAs}
#' \item{Non-vector parameters are single values}
#' }
#'
#' @return Produces error if not valid otherwise nothing
#' @examples
#' checkParams(defaultParams())
#' @export
checkParams <- function(params) {
# Check class before anything else
if (!("splatParams" %in% class(params))) {
stop("params does not belong to the splatParams class")
}
# Define what values each parameter can take
# NUM = Numeric
# POS = Positive numeric
# INT = Positive integer
# PROB = Positive numeric in range 0-1
# LOG = Logical
types <- c(nGenes = "INT", nCells = "INT", groupCells = "INT",
mean.rate = "POS", mean.shape = "POS", lib.loc = "NUM",
lib.scale = "POS", out.prob = "PROB", out.loProb = "PROB",
out.facLoc = "NUM", out.facScale = "POS", de.prob = "PROB",
de.downProb = "PROB", de.facLoc = "NUM", de.facScale = "POS",
bcv.common = "POS", bcv.DF = "POS", dropout.present = "LOG",
dropout.mid = "NUM", dropout.shape = "NUM", path.from = "INT",
path.length = "INT", path.skew = "PROB",
path.nonlinearProb = "PROB", path.sigmaFac = "POS")
# Define which parameters are allowed to be vectors
vectors <- c("groupCells", "path.from", "path.length", "path.skew")
for (idx in seq_along(types)) {
name <- names(types)[idx]
type <- types[idx]
# Check vector properties first so we can exclude vectors with an NA
# before the next section
if (length(value) > 1) {
if (name %in% vectors) {
if (any(is.na(value))) {
stop(name, " is a vector and contains NA values")
} else if (length(value) != n.groups) {
stop("length of ", name, " must be 1 or the length of ",
"the groupCells parameter")
}
} else {
stop(name, " should be a single value")
}
}
# Missing values are allowed so we skip anything that is NA
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
if (!all(is.na(value))) {
if (type %in% c("NUM", "INT", "POS", "PROB") &&
!(is.numeric(value))) {
stop(name, " must be numeric")
}
if (type %in% c("INT", "POS", "PROB") && value < 0) {
stop(name, " must be positive")
}
if (type == "INT" && value %% 1 != 0) {
stop(name, " must be an integer")
}
if (type == "PROB" && (value < 0 || value > 1)) {
stop(paste(name, "must be in the range 0-1"))
}
if (type == "LOG" && !(is.logical(value))) {
stop(name, " must be logical (TRUE/FALSE)")
}
}
}
}
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
#' Merge two splatParams objects
#'
#' Merge two splatParams objects. Any parameters that are NA in the first
#' splatParams object are replaced by the value in the second splatParams
#' object.
#'
#' @param params1 first splatParams object to merge.
#' @param params2 second splatParams object to merge.
#'
#' @return Merged splatParams object.
#' @examples
#' params <- splatParams(nGenes = 1000, nCells = 50)
#' params
#' # Replace unset parameters with default parameters
#' params <- mergeParams(params, defaultParams())
#' params
#' @export
mergeParams <- function(params1, params2) {
for (i in 1:length(params1)) {
for (j in 1:length(params1[[i]])) {
if (is.na(params1[[i]][[j]])) {
params1[[i]][[j]] <- params2[[i]][[j]]
}
}
}
return(params1)
}
#' Get default simulation parameters
#'
#' Get a splatParams object with a set of default parameters that will produce a
#' resonable simulation of single-cell RNA-seq count data.
#'
#' @return A splatParams object containing default parameters
#' @examples
#' params <- defaultParams()
#' params
#' @export
defaultParams <- function() {
params <- splatParams()
params <- setParams(params, nGenes = 10000, nCells = 100,
groupCells = 100, mean.rate = 0.3, mean.shape = 0.4,
lib.loc = 10, lib.scale = 0.5, out.prob = 0.1,
out.loProb = 0.5, out.facLoc = 4, out.facScale = 1,
de.prob = 0.1, de.downProb = 0.5, de.facLoc = 4,
de.facScale = 1, bcv.common = 0.1, bcv.DF = 25,
dropout.present = TRUE, dropout.mid = 0,
dropout.shape = -1, path.from = 0, path.length = 100,
path.skew = 0.5, path.nonlinearProb = 0.1,
path.sigmaFac = 0.8)