#' Estimate simple simulation parameters #' #' Estimate simulation parameters for the simple simulation from a real dataset. #' #' @param data either a counts matrix or an SCESet object containing count data #' to estimate parameters from. #' @param params SimpleParams object to store estimated values in. #' #' @details #' The \code{nGenes} and \code{nCells} parameters are taken from the size of the #' input data. The mean parameters are estimated by fitting a gamma distribution #' to the library size normalised mean expression level using #' \code{\link[fitdistrplus]{fitdist}}. See \code{\link{SimpleParams}} for more #' details on the parameters. #' #' @return SimpleParams object containing the estimated parameters. #' #' @examples #' data("sc_example_counts") #' params <- estimateSimpleParams(sc_example_counts) #' params #' @export estimateSimpleParams <- function(data, params = newSimpleParams()) { UseMethod("estimateSimpleParams") } #' @rdname estimateSimpleParams #' @export estimateSimpleParams.SCESet <- function(data, params = newSimpleParams()) { counts <- scater::counts(data) estimateSimpleParams(counts, params) } #' @rdname estimateSimpleParams #' @importFrom stats median #' @export estimateSimpleParams.matrix <- function(data, params = newSimpleParams()) { checkmate::assertClass(params, "SimpleParams") # Normalise for library size and remove all zero genes lib.sizes <- colSums(data) lib.med <- median(lib.sizes) norm.counts <- t(t(data) / lib.sizes * lib.med) norm.counts <- norm.counts[rowSums(norm.counts > 0) > 1, ] means <- rowMeans(norm.counts) means.fit <- fitdistrplus::fitdist(means, "gamma", method = "mme") params <- setParams(params, nGenes = nrow(data), nCells = nrow(data), mean.shape = unname(means.fit$estimate["shape"]), mean.rate = unname(means.fit$estimate["rate"])) return(params) }