Newer
Older
#' Estimate simple simulation parameters
#'
#' Estimate simulation parameters for the simple simulation from a real dataset.
#'
#' @param data either a counts matrix or an SCESet object containing count data
#' to estimate parameters from.
#' @param params SimpleParams object to store estimated values in.
#'
#' @details
#' The \code{nGenes} and \code{nCells} parameters are taken from the size of the
#' input data. The mean parameters are estimated by fitting a gamma distribution
#' to the library size normalised mean expression level using
#' \code{\link[fitdistrplus]{fitdist}}. See \code{\link{SimpleParams}} for more
#' details on the parameters.
#'
#' @return SimpleParams object containing the estimated parameters.
#'
#' @examples
#' data("sc_example_counts")
#' params <- estimateSimpleParams(sc_example_counts)
#' params
#' @export
estimateSimpleParams <- function(data, params = newSimpleParams()) {
UseMethod("estimateSimpleParams")
}
#' @export
estimateSimpleParams.SCESet <- function(data, params = newSimpleParams()) {
estimateSimpleParams(counts, params)
}
#' @importFrom stats median
#' @export
estimateSimpleParams.matrix <- function(data, params = newSimpleParams()) {
checkmate::assertClass(params, "SimpleParams")
# Normalise for library size and remove all zero genes
lib.sizes <- colSums(data)
lib.med <- median(lib.sizes)
norm.counts <- t(t(data) / lib.sizes * lib.med)
norm.counts <- norm.counts[rowSums(norm.counts > 0) > 1, ]
means <- rowMeans(norm.counts)
means.fit <- fitdistrplus::fitdist(means, "gamma", method = "mme")
params <- setParams(params, nGenes = nrow(data), nCells = nrow(data),
mean.shape = unname(means.fit$estimate["shape"]),
mean.rate = unname(means.fit$estimate["rate"]))
return(params)
}