Skip to content
Snippets Groups Projects
params.R 16.07 KiB
#' Create splatParams object
#'
#' S3 class for holding Splatter simulation parameters.
#'
#' @param ... parameters to set in the new params object, passed to
#'            \code{\link{setParams}}.
#'
#' @details
#' The splatParams object is a list based S3 object for holding simulation
#' parameters. It has the following sections and values:
#'
#' \itemize{
#'   \item nGenes - Number of genes to simulate.
#'   \item nCells - Number of cells to simulate.
#'   \item [groupCells] - Vector giving the number of cells in each simulation
#'         group/path.
#'   \item mean (mean parameters)
#'     \itemize{
#'       \item rate - Rate parameter for the mean gamma simulation.
#'       \item shape - Shape parameter for the mean gamma simulation.
#'     }
#'   \item lib (library size parameters)
#'     \itemize{
#'       \item loc - Location (meanlog) parameter for the library size
#'             log-normal distribution.
#'       \item scale - Scale (sdlog) parameter for the library size log-normal
#'             distribution.
#'     }
#'   \item out (expression outlier parameters)
#'     \itemize{
#'       \item prob - Probability that a gene is an expression outlier.
#'       \item loProb - Probability that an expression outlier gene is lowly
#'             expressed.
#'       \item facLoc - Location (meanlog) parameter for the expression outlier
#'             factor log-normal distribution.
#'       \item facScale - Scale (sdlog) parameter for the expression outlier
#'             factor log-normal distribution.
#'     }
#'   \item de (differential expression parameters)
#'     \itemize{
#'       \item [prob] - Probability that a gene is differentially expressed
#'             between groups or paths.
#'       \item [downProb] - Probability that differentially expressed gene is
#'             down-regulated.
#'       \item [facLoc] - Location (meanlog) parameter for the differential
#'             expression factor log-normal distribution.
#'       \item [facScale] - Scale (sdlog) parameter for the differential
#'             expression factor log-normal distribution.
#'     }
#'   \item bcv (Biological Coefficient of Variation parameters)
#'     \itemize{
#'       \item common - Underlying common dispersion across all genes.
#'       \item DF - Degrees of Freedom for the BCV inverse chi-squared
#'             distribution.
#'     }
#'   \item dropout (dropout parameters)
#'     \itemize{
#'       \item present - Logical. Whether to simulate dropout.
#'       \item mid - Midpoint parameter for the dropout logistic function.
#'       \item shape - Shape parameter for the dropout logistic function.
#'     }
#'   \item path (differentiation path parameters)
#'     \itemize{
#'       \item [from] - Vector giving the originating point of each path. This
#'             allows path structure such as a cell type which differentiates
#'             into an intermediate cell type that then differentiates into two
#'             mature cell types. A path structure of this form would have a
#'             "from" parameter of c(0, 1, 1) (where 0 is the origin). If no
#'             vector is given all paths will start at the origin.
#'       \item [length] - Vector giving the number of steps to simulate along
#'             each path. If a single value is given it will be applied to all
#'             paths.
#'       \item [skew] - Vector giving the skew of each path. Values closer to 1
#'             will give more cells towards the starting population, values
#'             closer to 0 will give more cells towards the final population.
#'             If a single value is given it will be applied to all paths.
#'       \item [nonlinearProb] - Probability that a gene follows a non-linear
#'             path along the differentiation path. This allows more complex
#'             gene patterns such as a gene being equally expressed at the
#'             beginning an end of a path but lowly expressed in the middle.
#'       \item [sigmaFac] - Sigma factor for non-linear gene paths. A higher
#'             value will result in more extreme non-linear variations along a
#'             path.
#'   }
#' }
#'
#' Those shown in brackets cannot currently be easily estimated from a real
#' dataset by Splatter. This is also shown when a splatParams object is printed
#' with parameters labelled as either (estimatable) or [not estimatable].
#'
#' @return List based S3 splatParams object
#' @examples
#' params <- splatParams()
#' params
#' @export
splatParams <- function(...) {
    params <- list(nGenes = NA, nCells = NA, groupCells = NA,
                   mean = list(rate = NA, shape = NA),
                   lib = list(loc = NA, scale = NA),
                   out = list(prob = NA, loProb = NA, facLoc = NA,
                              facScale = NA),
                   de = list(prob = NA, downProb = NA, facLoc = NA,
                             facScale = NA),
                   bcv = list(common = NA, DF = NA),
                   dropout = list(present = NA, mid = NA, shape = NA),
                   path = list(from = NA, length = NA, skew = NA,
                               nonlinearProb = NA, sigmaFac = NA))

    class(params) <- "splatParams"

    params <- setParams(params, ...)

    return(params)
}

#' Print splatParams object
#'
#' Pretty print the parameters stored in a splatParams object. Parameters are
#' labelled as either (estimatable) or [not estimatable].
#'
#' @param x splatParams object to print.
#' @param ... further arguments passed to or from other methods.
#'
#' @return Invisibly returns x (via \code{\link{invisible}(x)})
#'
#' @examples
#' params <- defaultParams()
#' print(params)
#' @export
print.splatParams <- function(x, ...) {

    pp <- list("Global:"             = c("(Genes)"        = "nGenes",
                                         "(Cells)"        = "nCells",
                                         "[Group Cells]"  = "groupCells"),
               "Mean:"               = c("(Rate)"         = "mean.rate",
                                         "(Shape)"        = "mean.shape"),
               "Library size:"       = c("(Location)"     = "lib.loc",
                                         "(Scale)"        = "lib.scale"),
               "Exprs outliers:"     = c("(Probability)"  = "out.prob",
                                         "(Lo Prob)"      = "out.loProb",
                                         "(Location)"     = "out.facLoc",
                                         "(Scale)"        = "out.facScale"),
               "Differential exprs:" = c("[Probability]"  = "de.prob",
                                         "[Down Prob]"    = "de.downProb",
                                         "[Location]"     = "de.facLoc",
                                         "[Scale]"        = "de.facScale"),
               "BCV:"                = c("(Common Disp)"  = "bcv.common",
                                         "(DoF)"          = "bcv.DF"),
               "Dropout:"            = c("(Present T/F)"  = "dropout.present",
                                         "(Midpoint)"     = "dropout.mid",
                                         "(Shape)"        = "dropout.shape"),
               "Paths:"              = c("[From]"         = "path.from",
                                         "[Length]"       = "path.length",
                                         "[Skew]"         = "path.skew",
                                         "[Non-linear]"   = "path.nonlinearProb",
                                         "[Sigma Factor]" = "path.sigmaFac"))

    for (category in names(pp)) {
        parameters <- getParams(x, pp[[category]])
        parameters <- sapply(parameters, paste, collapse = ", ")
        names(parameters) <- names(pp[[category]])
        cat(category, "\n")
        print(noquote(parameters), print.gap = 2)
        cat("\n")
    }

    invisible(x)
}

#' Update a splatParams object
#'
#' Set any of the parameters in a splatParams object to have a new value.
#'
#' @param params the splatParams object to update.
#' @param ... Any parameters to set.
#'
#' @details
#' This function allows multiple parameters to be updated or set using a single
#' simple function call. Parameters to update are specified by supplying
#' additional arguments that follow the levels of the splatParams data structure
#' separated by the "." character. For example
#' \code{setParams(params, nGenes = 100)} is equivalent to
#' \code{params$nGenes <- 100} and \code{update(params, mean.rate = 1)} is
#' equivalent to \code{params$mean$rate <- 1}. For more details of the available
#' parameters and the splatParams data structure see \code{\link{splatParams}}.
#'
#' @return splatParms object with updated parameters
#' @examples
#' params <- defaultParams()
#' params
#' # Set nGenes and nCells
#' params <- setParams(params, nGenes = 1000, nCells = 200)
#' params
#' # Set mean rate paramater and library size location parameter
#' params <- setParams(params, mean.rate = 1, lib.loc = 12)
#' params
#' @export
setParams <- function(params, ...) {

    update <- list(...)

    if (length(update) == 0) {
        return(params)
    }

    names <- strsplit(names(update), ".", fixed = TRUE)

    for (idx in seq_along(names)) {
        name <- names[[idx]]
        value <- update[[idx]]
        if (length(name) == 1) {
            params[[name]] <- value
        } else {
            params[[name[1]]][[name[2]]] <- value
        }
    }

    checkParams(params)

    return(params)
}

#' Get parameters from splatParams object
#'
#' Get values for the parameters in a splatParams object. Uses the same pattern
#' (category.parameter) as \code{\link{setParams}}.
#'
#' @param params splatParams object to get parameters from.
#' @param names vector of parameter names to extract.
#'
#' @return Vector if all selected parameters are single values, otherwise a
#'         list.
#' @examples
#' params <- defaultParams()
#' # Get the number of genes
#' getParams(params, "nGenes")
#' # Get the number of genes and the mean rate parameter
#' getParams(params, c("nGenes", "mean.rate"))
#' # Returns a list if one of the selected parameters is a vector
#' params <- setParams(params, groupCells = c(100, 200))
#' getParams(params, c("nGenes", "mean.rate", "groupCells"))
#' @export
getParams <- function(params, names) {

    if (length(names) == 0) {
        return(NULL)
    }

    output <- list()
    keep.list <- FALSE
    for (idx in seq_along(names)) {
        name <- names[[idx]]
        name.split <- strsplit(name, ".", fixed = TRUE)[[1]]
        if (length(name) == 1) {
            value <- params[[name.split]]
        } else {
            value <- params[[name.split[1]]][[name.split[2]]]
        }
        output[[name]] <- value
        if (length(value) > 1) {
            keep.list <- TRUE
        }
    }

    if (!keep.list) {
        output <- unlist(output)
    } else if (length(output) == 1) {
        output <- output[[1]]
    }

    return(output)
}

#' Check splatParams object
#'
#' Check that a splatParams object has valid parameter values.
#'
#' @param params splatParams object to check
#'
#' @details
#' The following checks are made:
#' \itemize{
#'   \item{Input has "splatParams" class}
#'   \item{Logical parameters are logical}
#'   \item{Numeric parameters are numeric}
#'   \item{Positive numeric parameters are positive}
#'   \item{Integer parameters are integers}
#'   \item{Probability parameters are in the range 0-1}
#'   \item{Vector parameters are the correct length}
#'   \item{Vector parameters do not contain NAs}
#'   \item{Non-vector parameters are single values}
#' }
#'
#' @return Produces error if not valid otherwise nothing
#' @examples
#' checkParams(defaultParams())
#' @export
checkParams <- function(params) {

    # Check class before anything else
    if (!("splatParams" %in% class(params))) {
        stop("params does not belong to the splatParams class")
    }

    # Define what values each parameter can take
    # NUM = Numeric
    # POS = Positive numeric
    # INT = Positive integer
    # PROB = Positive numeric in range 0-1
    # LOG = Logical
    types <- c(nGenes = "INT", nCells = "INT", groupCells = "INT",
               mean.rate = "POS", mean.shape = "POS", lib.loc = "NUM",
               lib.scale = "POS", out.prob = "PROB", out.loProb = "PROB",
               out.facLoc = "NUM", out.facScale = "POS", de.prob = "PROB",
               de.downProb = "PROB", de.facLoc = "NUM", de.facScale = "POS",
               bcv.common = "POS", bcv.DF = "POS", dropout.present = "LOG",
               dropout.mid = "NUM", dropout.shape = "NUM", path.from = "INT",
               path.length = "INT", path.skew = "PROB",
               path.nonlinearProb = "PROB", path.sigmaFac = "POS")

    # Define which parameters are allowed to be vectors
    vectors <- c("groupCells", "path.from", "path.length", "path.skew")
    n.groups <- length(getParams(params, "groupCells"))

    for (idx in seq_along(types)) {
        name <- names(types)[idx]
        type <- types[idx]
        value <- getParams(params, name)

        # Check vector properties first so we can exclude vectors with an NA
        # before the next section
        if (length(value) > 1) {
            if (name %in% vectors) {
                if (any(is.na(value))) {
                    stop(name, " is a vector and contains NA values")
                } else if (length(value) != n.groups) {
                    stop("length of ", name, " must be 1 or the length of ",
                         "the groupCells parameter")
                }
            } else {
                stop(name, " should be a single value")
            }
        }

        # Missing values are allowed so we skip anything that is NA
        if (!all(is.na(value))) {

            if (type %in% c("NUM", "INT", "POS", "PROB") &&
                !(is.numeric(value))) {
                stop(name, " must be numeric")
            }

            if (type %in% c("INT", "POS", "PROB") && value < 0) {
                stop(name, " must be positive")
            }

            if (type == "INT" && value %% 1 != 0) {
                stop(name, " must be an integer")
            }

            if (type == "PROB" && (value < 0 || value > 1)) {
                stop(paste(name, "must be in the range 0-1"))
            }

            if (type == "LOG" && !(is.logical(value))) {
                stop(name, " must be logical (TRUE/FALSE)")
            }
        }
    }
}

#' Merge two splatParams objects
#'
#' Merge two splatParams objects. Any parameters that are NA in the first
#' splatParams object are replaced by the value in the second splatParams
#' object.
#'
#' @param params1 first splatParams object to merge.
#' @param params2 second splatParams object to merge.
#'
#' @return Merged splatParams object.
#' @examples
#' params <- splatParams(nGenes = 1000, nCells = 50)
#' params
#' # Replace unset parameters with default parameters
#' params <- mergeParams(params, defaultParams())
#' params
#' @export
mergeParams <- function(params1, params2) {

    for (i in 1:length(params1)) {
        for (j in 1:length(params1[[i]])) {
            if (is.na(params1[[i]][[j]])) {
                params1[[i]][[j]] <- params2[[i]][[j]]
            }
        }
    }

    checkParams(params1)

    return(params1)
}

#' Get default simulation parameters
#'
#' Get a splatParams object with a set of default parameters that will produce a
#' resonable simulation of single-cell RNA-seq count data.
#'
#' @return A splatParams object containing default parameters
#' @examples
#' params <- defaultParams()
#' params
#' @export
defaultParams <- function() {

    params <- splatParams()

    params <- setParams(params, nGenes = 10000, nCells = 100,
                        groupCells = 100, mean.rate = 0.3, mean.shape = 0.4,
                        lib.loc = 10, lib.scale = 0.5, out.prob = 0.1,
                        out.loProb = 0.5, out.facLoc = 4, out.facScale = 1,
                        de.prob = 0.1, de.downProb = 0.5, de.facLoc = 4,
                        de.facScale = 1, bcv.common = 0.1, bcv.DF = 25,
                        dropout.present = TRUE, dropout.mid = 0,
                        dropout.shape = -1, path.from = 0, path.length = 100,
                        path.skew = 0.5, path.nonlinearProb = 0.1,
                        path.sigmaFac = 0.8)

    return(params)
}