Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
#' Create splatParams object
#'
#' S3 class for holding Splatter simulation parameters.
#'
#' @param ... parameters to set in the new params object, passed to
#' \code{\link{updateParams}}.
#'
#' @details
#' The splatParams object is a list based S3 object for holding simulation
#' parameters. It has the following sections and values:
#'
#' \itemize{
#' \item nGenes - Number of genes to simulate.
#' \item nCells - Number of cells to simulate.
#' \item [groupCells] - Vector giving the number of cells in each simulation
#' group/path.
#' \item mean (mean parameters)
#' \itemize{
#' \item rate - Rate parameter for the mean gamma simulation.
#' \item shape - Shape parameter for the mean gamma simulation.
#' }
#' \item lib (library size parameters)
#' \itemize{
#' \item loc - Location (meanlog) parameter for the library size
#' log-normal distribution.
#' \item scale - Scale (sdlog) parameter for the library size log-normal
#' distribution.
#' }
#' \item out (expression outlier parameters)
#' \itemize{
#' \item prob - Probability that a gene is an expression outlier.
#' \item loProb - Probability that an expression outlier gene is lowly
#' expressed.
#' \item facLoc - Location (meanlog) parameter for the expression outlier
#' factor log-normal distribution.
#' \item facScale - Scale (sdlog) parameter for the expression outlier
#' factor log-normal distribution.
#' }
#' \item de (differential expression parameters)
#' \itemize{
#' \item [prob] - Probability that a gene is differentially expressed
#' between groups or paths.
#' \item [downProb] - Probability that differentially expressed gene is
#' down-regulated.
#' \item [facLoc] - Location (meanlog) parameter for the differential
#' expression factor log-normal distribution.
#' \item [facScale] - Scale (sdlog) parameter for the differential
#' expression factor log-normal distribution.
#' }
#' \item bcv (Biological Coefficient of Variation parameters)
#' \itemize{
#' \item common - Underlying common dispersion across all genes.
#' \item DF - Degrees of Freedom for the BCV inverse chi-squared
#' distribution.
#' }
#' \item dropout (dropout parameters)
#' \itemize{
#' \item present - Logical. Whether to simulate dropout.
#' \item mid - Midpoint parameter for the dropout logistic function.
#' \item shape - Shape parameter for the dropout logistic function.
#' }
#' \item path (differentiation path parameters)
#' \itemize{
#' \item [from] - Vector giving the originating point of each path. This
#' allows path structure such as a cell type which differentiates
#' into an intermediate cell type that then differentiates into two
#' mature cell types. A path structure of this form would have a
#' "from" parameter of c(0, 1, 1) (where 0 is the origin). If no
#' vector is given all paths will start at the origin.
#' \item [length] - Vector giving the number of steps to simulate along
#' each path. If a single value is given it will be applied to all
#' paths.
#' \item [skew] - Vector giving the skew of each path. Values closer to 1
#' will give more cells towards the starting population, values
#' closer to 0 will give more cells towards the final population.
#' If a single value is given it will be applied to all paths.
#' \item [nonlinearProb] - Probability that a gene follows a non-linear
#' path along the differentiation path. This allows more complex
#' gene patterns such as a gene being equally expressed at the
#' beginning an end of a path but lowly expressed in the middle.
#' \item [sigmaFac] - Sigma factor for non-linear gene paths. A higher
#' value will result in more extreme non-linear variations along a
#' path.
#' }
#' }
#'
#' Those shown in brackets cannot currently be easily estimated from a real
#' dataset by Splatter. This is also shown when a splatParams object is printed
#' with parameters labelled as either (estimatable) or [not estimatable].
#'
#' @return List based S3 splatParams object
#' @examples
#' params <- splatParams()
#' params
#' @export
splatParams <- function(...) {
params <- list(nGenes = NA, nCells = NA, groupCells = NA,
mean = list(rate = NA, shape = NA),
lib = list(loc = NA, scale = NA),
out = list(prob = NA, loProb = NA, facLoc = NA,
facScale = NA),
de = list(prob = NA, downProb = NA, facLoc = NA,
facScale = NA),
bcv = list(common = NA, DF = NA),
dropout = list(present = NA, mid = NA, shape = NA),
path = list(from = NA, length = NA, skew = NA,
nonlinearProb = NA, sigmaFac = NA))
class(params) <- "splatParams"
params <- updateParams(params, ...)
return(params)
}
#' Print splatParams object
#'
#' Pretty print the parameters stored in a splatParams object. Parameters are
#' labelled as either (estimatable) or [not estimatable].
#'
#' @param x splatParams object to print.
#' @param ... further arguments passed to or from other methods.
#'
#' @examples
#' params <- defaultParams()
#' print(params)
#' @export
print.splatParams <- function(x, ...) {
pp <- list("Global:" = c("(Genes)" = x$nGenes, "(Cells)" = x$nCells,
"[Group Cells]" = x$groupCells),
"Mean:" = c("(Rate)" = x$mean$rate, "(Shape)" = x$mean$shape),
"Library size:" = c("(Location)" = x$lib$loc,
"(Scale)" = x$lib$scale),
"Expression outliers:" = c("(Probability)" = x$out$prob,
"(Lo Probability)" = x$out$loProb,
"(Location)" = x$out$facLoc,
"(Scale)" = x$out$facScale),
"Differential expression:" = c("[Probability]" = x$de$prob,
"[Down Prob]" = x$de$downProb,
"[Location]" = x$de$facLoc,
"[Scale]" = x$de$facScale),
"BCV:" = c("(Common Disp)" = x$bcv$common,
"(Degrees of Freedom)" = x$bcv$DF),
"Dropout:" = c("(Present T/F)" = x$dropout$present,
"(Midpoint)" = x$dropout$mid,
"(Shape)" = x$dropout$shape),
"Paths:" = c("[From]" = x$path$from, "[Length]" = x$path$length,
"[Skew]" = x$path$skew,
"[Non-linear Prob]" = x$path$nonlinearProb,
"[Sigma Factor]" = x$path$sigmaFac))
for (category in names(pp)) {
cat(category, "\n")
print.default(pp[[category]], print.gap = 2)
cat("\n")
}
}
#' Update a splatParams object
#'
#' Update any of the parameters in a splatParams object to have a new value.
#'
#' @param params the splatParams object to update.
#' @param ... Any parameters to update.
#'
#' @details
#' This function allows multiple parameters to be updated or set using a single
#' simple function call. Parameters to update are specified by supplying
#' additional arguments that follow the levels of the splatParams data structure
#' separated by the "." character. For example
#' \code{updateParams(params, nGenes = 100)} is equivalent to
#' \code{params$nGenes <- 100} and \code{update(params, mean.rate = 1)} is
#' equivalent to \code{params$mean$rate <- 1}. For more details of the available
#' parameters and the splatParams data structure see \code{\link{splatParams}}.
#'
#' @return splatParms object with updated parameters
#' @examples
#' params <- defaultParams()
#' params
#' # Set nGenes and nCells
#' params <- updateParams(params, nGenes = 1000, nCells = 200)
#' params
#' # Set mean rate paramater and library size location parameter
#' params <- updateParams(params, mean.rate = 1, lib.loc = 12)
#' params
#' @export
updateParams <- function(params, ...) {
update <- list(...)
if (length(update) == 0) {
return(params)
}
update.names <- strsplit(names(update), ".", fixed = TRUE)
for (idx in 1:length(update)) {
update.name <- update.names[[idx]]
value <- update[[idx]]
if (length(update.name) == 1) {
params[update.name] <- value
} else {
params[[update.name[1]]][update.name[2]] <- value
}
}
return(params)
}
#' Merge two splatParams objects
#'
#' Merge two splatParams objects. Any parameters that are NA in the first
#' splatParams object are replaced by the value in the second splatParams
#' object.
#'
#' @param params1 first splatParams object to merge.
#' @param params2 second splatParams object to merge.
#'
#' @return Merged splatParams object.
#' @examples
#' params <- splatParams(nGenes = 1000, nCells = 50)
#' params
#' # Replace unset parameters with default parameters
#' params <- mergeParams(params, defaultParams())
#' params
#' @export
mergeParams <- function(params1, params2) {
for (i in 1:length(params1)) {
for (j in 1:length(params1[[i]])) {
if (is.na(params1[[i]][[j]])) {
params1[[i]][[j]] <- params2[[i]][[j]]
}
}
}
return(params1)
}
#' Get default simulation parameters
#'
#' Get a splatParams object with a set of default parameters that will produce a
#' resonable simulation of single-cell RNA-seq count data.
#'
#' @return A splatParams object containing default parameters
#' @examples
#' params <- defaultParams()
#' params
#' @export
defaultParams <- function() {
params <- splatParams()
params <- updateParams(params, nGenes = 10000, nCells = 100,
groupCells = 100, mean.rate = 0.3, mean.shape = 0.4,
lib.loc = 10, lib.scale = 0.5, out.prob = 0.1,
out.loProb = 0.5, out.facLoc = 4, out.facScale = 1,
de.prob = 0.1, de.downProb = 0.5, de.facLoc = 4,
de.facScale = 1, bcv.common = 0.1, bcv.DF = 25,
dropout.present = TRUE, dropout.mid = 0,
dropout.shape = -1, path.from = 0,
path.length = 100, path.skew = 0.5,
path.nonlinearProb = 0.1, path.sigmaFac = 0.8)
return(params)
}