Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#' Estimate simple simulation parameters
#'
#' Estimate simulation parameters for the simple simulation from a real dataset.
#'
#' @param counts either a counts matrix or an SCESet object containing count
#' data to estimate parameters from.
#' @param params SimpleParams object to store estimated values in.
#'
#' @details
#' The \code{nGenes} and \code{nCells} parameters are taken from the size of the
#' input data. The mean parameters are estimated by fitting a gamma distribution
#' to the library size normalised mean expression level using
#' \code{\link[fitdistrplus]{fitdist}}. See \code{\link{SimpleParams}} for more
#' details on the parameters.
#'
#' @return SimpleParams object containing the estimated parameters.
#'
#' @examples
#' data("sc_example_counts")
#' params <- simpleEstimate(sc_example_counts)
#' params
#' @export
simpleEstimate <- function(counts, params = newSimpleParams()) {
UseMethod("simpleEstimate")
}
#' @rdname simpleEstimate
#' @export
simpleEstimate.SCESet <- function(counts, params = newSimpleParams()) {
counts <- scater::counts(counts)
simpleEstimate(counts, params)
}
#' @rdname simpleEstimate
#' @importFrom stats median
#' @export
simpleEstimate.matrix <- function(counts, params = newSimpleParams()) {
checkmate::assertClass(params, "SimpleParams")
# Normalise for library size and remove all zero genes
lib.sizes <- colSums(counts)
lib.med <- median(lib.sizes)
norm.counts <- t(t(counts) / lib.sizes * lib.med)
norm.counts <- norm.counts[rowSums(norm.counts > 0) > 1, ]
means <- rowMeans(norm.counts)
means.fit <- fitdistrplus::fitdist(means, "gamma", method = "mme")
params <- setParams(params, nGenes = nrow(counts), nCells = ncol(counts),
mean.shape = unname(means.fit$estimate["shape"]),
mean.rate = unname(means.fit$estimate["rate"]))
return(params)
}