From 504b1d0b97143f55a7b55cf55fc04b171d46c1d4 Mon Sep 17 00:00:00 2001
From: Luke Zappia <lazappi@users.noreply.github.com>
Date: Wed, 5 Oct 2016 12:26:20 +1100
Subject: [PATCH] Add params functions

---
 NAMESPACE                |   5 +
 R/params.R               | 267 +++++++++++++++++++++++++++++++++++++++
 man/defaultParams.Rd     |  20 +++
 man/mergeParams.Rd       |  29 +++++
 man/print.splatParams.Rd |  22 ++++
 man/splatParams.Rd       | 106 ++++++++++++++++
 man/splatter.Rd          |  16 +++
 man/updateParams.Rd      |  40 ++++++
 8 files changed, 505 insertions(+)
 create mode 100644 R/params.R
 create mode 100644 man/defaultParams.Rd
 create mode 100644 man/mergeParams.Rd
 create mode 100644 man/print.splatParams.Rd
 create mode 100644 man/splatParams.Rd
 create mode 100644 man/splatter.Rd
 create mode 100644 man/updateParams.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 6ae9268..7824ce6 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,2 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(print,splatParams)
+export(defaultParams)
+export(mergeParams)
+export(splatParams)
+export(updateParams)
diff --git a/R/params.R b/R/params.R
new file mode 100644
index 0000000..9e8b80b
--- /dev/null
+++ b/R/params.R
@@ -0,0 +1,267 @@
+#' Create splatParams object
+#'
+#' S3 class for holding Splatter simulation parameters.
+#'
+#' @param ... parameters to set in the new params object, passed to
+#'            \code{\link{updateParams}}.
+#'
+#' @details
+#' The splatParams object is a list based S3 object for holding simulation
+#' parameters. It has the following sections and values:
+#'
+#' \itemize{
+#'   \item nGenes - Number of genes to simulate.
+#'   \item nCells - Number of cells to simulate.
+#'   \item [groupCells] - Vector giving the number of cells in each simulation
+#'         group/path.
+#'   \item mean (mean parameters)
+#'     \itemize{
+#'       \item rate - Rate parameter for the mean gamma simulation.
+#'       \item shape - Shape parameter for the mean gamma simulation.
+#'     }
+#'   \item lib (library size parameters)
+#'     \itemize{
+#'       \item loc - Location (meanlog) parameter for the library size
+#'             log-normal distribution.
+#'       \item scale - Scale (sdlog) parameter for the library size log-normal
+#'             distribution.
+#'     }
+#'   \item out (expression outlier parameters)
+#'     \itemize{
+#'       \item prob - Probability that a gene is an expression outlier.
+#'       \item loProb - Probability that an expression outlier gene is lowly
+#'             expressed.
+#'       \item facLoc - Location (meanlog) parameter for the expression outlier
+#'             factor log-normal distribution.
+#'       \item facScale - Scale (sdlog) parameter for the expression outlier
+#'             factor log-normal distribution.
+#'     }
+#'   \item de (differential expression parameters)
+#'     \itemize{
+#'       \item [prob] - Probability that a gene is differentially expressed
+#'             between groups or paths.
+#'       \item [downProb] - Probability that differentially expressed gene is
+#'             down-regulated.
+#'       \item [facLoc] - Location (meanlog) parameter for the differential
+#'             expression factor log-normal distribution.
+#'       \item [facScale] - Scale (sdlog) parameter for the differential
+#'             expression factor log-normal distribution.
+#'     }
+#'   \item bcv (Biological Coefficient of Variation parameters)
+#'     \itemize{
+#'       \item common - Underlying common dispersion across all genes.
+#'       \item DF - Degrees of Freedom for the BCV inverse chi-squared
+#'             distribution.
+#'     }
+#'   \item dropout (dropout parameters)
+#'     \itemize{
+#'       \item present - Logical. Whether to simulate dropout.
+#'       \item mid - Midpoint parameter for the dropout logistic function.
+#'       \item shape - Shape parameter for the dropout logistic function.
+#'     }
+#'   \item path (differentiation path parameters)
+#'     \itemize{
+#'       \item [from] - Vector giving the originating point of each path. This
+#'             allows path structure such as a cell type which differentiates
+#'             into an intermediate cell type that then differentiates into two
+#'             mature cell types. A path structure of this form would have a
+#'             "from" parameter of c(0, 1, 1) (where 0 is the origin). If no
+#'             vector is given all paths will start at the origin.
+#'       \item [length] - Vector giving the number of steps to simulate along
+#'             each path. If a single value is given it will be applied to all
+#'             paths.
+#'       \item [skew] - Vector giving the skew of each path. Values closer to 1
+#'             will give more cells towards the starting population, values
+#'             closer to 0 will give more cells towards the final population.
+#'             If a single value is given it will be applied to all paths.
+#'       \item [nonlinearProb] - Probability that a gene follows a non-linear
+#'             path along the differentiation path. This allows more complex
+#'             gene patterns such as a gene being equally expressed at the
+#'             beginning an end of a path but lowly expressed in the middle.
+#'       \item [sigmaFac] - Sigma factor for non-linear gene paths. A higher
+#'             value will result in more extreme non-linear variations along a
+#'             path.
+#'   }
+#' }
+#'
+#' Those shown in brackets cannot currently be easily estimated from a real
+#' dataset by Splatter. This is also shown when a splatParams object is printed
+#' with parameters labelled as either (estimatable) or [not estimatable].
+#'
+#' @return List based S3 splatParams object
+#' @examples
+#' params <- splatParams()
+#' params
+#' @export
+splatParams <- function(...) {
+    params <- list(nGenes = NA, nCells = NA, groupCells = NA,
+                   mean = list(rate = NA, shape = NA),
+                   lib = list(loc = NA, scale = NA),
+                   out = list(prob = NA, loProb = NA, facLoc = NA,
+                              facScale = NA),
+                   de = list(prob = NA, downProb = NA, facLoc = NA,
+                             facScale = NA),
+                   bcv = list(common = NA, DF = NA),
+                   dropout = list(present = NA, mid = NA, shape = NA),
+                   path = list(from = NA, length = NA, skew = NA,
+                               nonlinearProb = NA, sigmaFac = NA))
+
+    class(params) <- "splatParams"
+
+    params <- updateParams(params, ...)
+
+    return(params)
+}
+
+#' Print splatParams object
+#'
+#' Pretty print the parameters stored in a splatParams object. Parameters are
+#' labelled as either (estimatable) or [not estimatable].
+#'
+#' @param x splatParams object to print.
+#' @param ... further arguments passed to or from other methods.
+#'
+#' @examples
+#' params <- defaultParams()
+#' print(params)
+#' @export
+print.splatParams <- function(x, ...) {
+
+    pp <- list("Global:" = c("(Genes)" = x$nGenes, "(Cells)" = x$nCells,
+                             "[Group Cells]" = x$groupCells),
+               "Mean:" = c("(Rate)" = x$mean$rate, "(Shape)" = x$mean$shape),
+               "Library size:" = c("(Location)" = x$lib$loc,
+                                   "(Scale)" = x$lib$scale),
+               "Expression outliers:" = c("(Probability)" = x$out$prob,
+                                          "(Lo Probability)" = x$out$loProb,
+                                          "(Location)" = x$out$facLoc,
+                                          "(Scale)" = x$out$facScale),
+               "Differential expression:" = c("[Probability]" = x$de$prob,
+                                              "[Down Prob]" = x$de$downProb,
+                                              "[Location]" = x$de$facLoc,
+                                              "[Scale]" = x$de$facScale),
+               "BCV:" = c("(Common Disp)" = x$bcv$common,
+                          "(Degrees of Freedom)" = x$bcv$DF),
+               "Dropout:" = c("(Present T/F)" = x$dropout$present,
+                              "(Midpoint)" = x$dropout$mid,
+                              "(Shape)" = x$dropout$shape),
+               "Paths:" = c("[From]" = x$path$from, "[Length]" = x$path$length,
+                            "[Skew]" = x$path$skew,
+                            "[Non-linear Prob]" = x$path$nonlinearProb,
+                            "[Sigma Factor]" = x$path$sigmaFac))
+
+    for (category in names(pp)) {
+        cat(category, "\n")
+        print.default(pp[[category]], print.gap = 2)
+        cat("\n")
+    }
+}
+
+#' Update a splatParams object
+#'
+#' Update any of the parameters in a splatParams object to have a new value.
+#'
+#' @param params the splatParams object to update.
+#' @param ... Any parameters to update.
+#'
+#' @details
+#' This function allows multiple parameters to be updated or set using a single
+#' simple function call. Parameters to update are specified by supplying
+#' additional arguments that follow the levels of the splatParams data structure
+#' separated by the "." character. For example
+#' \code{updateParams(params, nGenes = 100)} is equivalent to
+#' \code{params$nGenes <- 100} and \code{update(params, mean.rate = 1)} is
+#' equivalent to \code{params$mean$rate <- 1}. For more details of the available
+#' parameters and the splatParams data structure see \code{\link{splatParams}}.
+#'
+#' @return splatParms object with updated parameters
+#' @examples
+#' params <- defaultParams()
+#' params
+#' # Set nGenes and nCells
+#' params <- updateParams(params, nGenes = 1000, nCells = 200)
+#' params
+#' # Set mean rate paramater and library size location parameter
+#' params <- updateParams(params, mean.rate = 1, lib.loc = 12)
+#' params
+#' @export
+updateParams <- function(params, ...) {
+
+    update <- list(...)
+
+    if (length(update) == 0) {
+        return(params)
+    }
+
+    update.names <- strsplit(names(update), ".", fixed = TRUE)
+
+    for (idx in 1:length(update)) {
+        update.name <- update.names[[idx]]
+        value <- update[[idx]]
+        if (length(update.name) == 1) {
+            params[update.name] <- value
+        } else {
+            params[[update.name[1]]][update.name[2]] <- value
+        }
+    }
+
+    return(params)
+}
+
+#' Merge two splatParams objects
+#'
+#' Merge two splatParams objects. Any parameters that are NA in the first
+#' splatParams object are replaced by the value in the second splatParams
+#' object.
+#'
+#' @param params1 first splatParams object to merge.
+#' @param params2 second splatParams object to merge.
+#'
+#' @return Merged splatParams object.
+#' @examples
+#' params <- splatParams(nGenes = 1000, nCells = 50)
+#' params
+#' # Replace unset parameters with default parameters
+#' params <- mergeParams(params, defaultParams())
+#' params
+#' @export
+mergeParams <- function(params1, params2) {
+
+    for (i in 1:length(params1)) {
+        for (j in 1:length(params1[[i]])) {
+            if (is.na(params1[[i]][[j]])) {
+                params1[[i]][[j]] <- params2[[i]][[j]]
+            }
+        }
+    }
+
+    return(params1)
+}
+
+#' Get default simulation parameters
+#'
+#' Get a splatParams object with a set of default parameters that will produce a
+#' resonable simulation of single-cell RNA-seq count data.
+#'
+#' @return A splatParams object containing default parameters
+#' @examples
+#' params <- defaultParams()
+#' params
+#' @export
+defaultParams <- function() {
+
+    params <- splatParams()
+
+    params <- updateParams(params, nGenes = 10000, nCells = 100,
+                           groupCells = 100, mean.rate = 0.3, mean.shape = 0.4,
+                           lib.loc = 10, lib.scale = 0.5, out.prob = 0.1,
+                           out.loProb = 0.5, out.facLoc = 4, out.facScale = 1,
+                           de.prob = 0.1, de.downProb = 0.5, de.facLoc = 4,
+                           de.facScale = 1, bcv.common = 0.1, bcv.DF = 25,
+                           dropout.present = TRUE, dropout.mid = 0,
+                           dropout.shape = -1, path.from = 0,
+                           path.length = 100, path.skew = 0.5,
+                           path.nonlinearProb = 0.1, path.sigmaFac = 0.8)
+
+    return(params)
+}
\ No newline at end of file
diff --git a/man/defaultParams.Rd b/man/defaultParams.Rd
new file mode 100644
index 0000000..18c9e43
--- /dev/null
+++ b/man/defaultParams.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/params.R
+\name{defaultParams}
+\alias{defaultParams}
+\title{Get default simulation parameters}
+\usage{
+defaultParams()
+}
+\value{
+A splatParams object containing default parameters
+}
+\description{
+Get a splatParams object with a set of default parameters that will produce a
+resonable simulation of single-cell RNA-seq count data.
+}
+\examples{
+params <- defaultParams()
+params
+}
+
diff --git a/man/mergeParams.Rd b/man/mergeParams.Rd
new file mode 100644
index 0000000..b8b5f53
--- /dev/null
+++ b/man/mergeParams.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/params.R
+\name{mergeParams}
+\alias{mergeParams}
+\title{Merge two splatParams objects}
+\usage{
+mergeParams(params1, params2)
+}
+\arguments{
+\item{params1}{first splatParams object to merge.}
+
+\item{params2}{second splatParams object to merge.}
+}
+\value{
+Merged splatParams object.
+}
+\description{
+Merge two splatParams objects. Any parameters that are NA in the first
+splatParams object are replaced by the value in the second splatParams
+object.
+}
+\examples{
+params <- splatParams(nGenes = 1000, nCells = 50)
+params
+# Replace unset parameters with default parameters
+params <- mergeParams(params, defaultParams())
+params
+}
+
diff --git a/man/print.splatParams.Rd b/man/print.splatParams.Rd
new file mode 100644
index 0000000..fca0f4a
--- /dev/null
+++ b/man/print.splatParams.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/params.R
+\name{print.splatParams}
+\alias{print.splatParams}
+\title{Print splatParams object}
+\usage{
+\method{print}{splatParams}(x, ...)
+}
+\arguments{
+\item{x}{splatParams object to print.}
+
+\item{...}{further arguments passed to or from other methods.}
+}
+\description{
+Pretty print the parameters stored in a splatParams object. Parameters are
+labelled as either (estimatable) or [not estimatable].
+}
+\examples{
+params <- defaultParams()
+print(params)
+}
+
diff --git a/man/splatParams.Rd b/man/splatParams.Rd
new file mode 100644
index 0000000..e1bf1b4
--- /dev/null
+++ b/man/splatParams.Rd
@@ -0,0 +1,106 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/params.R
+\name{splatParams}
+\alias{splatParams}
+\title{Create splatParams object}
+\usage{
+splatParams(...)
+}
+\arguments{
+\item{...}{parameters to set in the new params object, passed to
+\code{\link{updateParams}}.}
+}
+\value{
+List based S3 splatParams object
+}
+\description{
+S3 class for holding Splatter simulation parameters.
+}
+\details{
+The splatParams object is a list based S3 object for holding simulation
+parameters. It has the following sections and values:
+
+\itemize{
+  \item nGenes - Number of genes to simulate.
+  \item nCells - Number of cells to simulate.
+  \item [groupCells] - Vector giving the number of cells in each simulation
+        group/path.
+  \item mean (mean parameters)
+    \itemize{
+      \item rate - Rate parameter for the mean gamma simulation.
+      \item shape - Shape parameter for the mean gamma simulation.
+    }
+  \item lib (library size parameters)
+    \itemize{
+      \item loc - Location (meanlog) parameter for the library size
+            log-normal distribution.
+      \item scale - Scale (sdlog) parameter for the library size log-normal
+            distribution.
+    }
+  \item out (expression outlier parameters)
+    \itemize{
+      \item prob - Probability that a gene is an expression outlier.
+      \item loProb - Probability that an expression outlier gene is lowly
+            expressed.
+      \item facLoc - Location (meanlog) parameter for the expression outlier
+            factor log-normal distribution.
+      \item facScale - Scale (sdlog) parameter for the expression outlier
+            factor log-normal distribution.
+    }
+  \item de (differential expression parameters)
+    \itemize{
+      \item [prob] - Probability that a gene is differentially expressed
+            between groups or paths.
+      \item [downProb] - Probability that differentially expressed gene is
+            down-regulated.
+      \item [facLoc] - Location (meanlog) parameter for the differential
+            expression factor log-normal distribution.
+      \item [facScale] - Scale (sdlog) parameter for the differential
+            expression factor log-normal distribution.
+    }
+  \item bcv (Biological Coefficient of Variation parameters)
+    \itemize{
+      \item common - Underlying common dispersion across all genes.
+      \item DF - Degrees of Freedom for the BCV inverse chi-squared
+            distribution.
+    }
+  \item dropout (dropout parameters)
+    \itemize{
+      \item present - Logical. Whether to simulate dropout.
+      \item mid - Midpoint parameter for the dropout logistic function.
+      \item shape - Shape parameter for the dropout logistic function.
+    }
+  \item path (differentiation path parameters)
+    \itemize{
+      \item [from] - Vector giving the originating point of each path. This
+            allows path structure such as a cell type which differentiates
+            into an intermediate cell type that then differentiates into two
+            mature cell types. A path structure of this form would have a
+            "from" parameter of c(0, 1, 1) (where 0 is the origin). If no
+            vector is given all paths will start at the origin.
+      \item [length] - Vector giving the number of steps to simulate along
+            each path. If a single value is given it will be applied to all
+            paths.
+      \item [skew] - Vector giving the skew of each path. Values closer to 1
+            will give more cells towards the starting population, values
+            closer to 0 will give more cells towards the final population.
+            If a single value is given it will be applied to all paths.
+      \item [nonlinearProb] - Probability that a gene follows a non-linear
+            path along the differentiation path. This allows more complex
+            gene patterns such as a gene being equally expressed at the
+            beginning an end of a path but lowly expressed in the middle.
+      \item [sigmaFac] - Sigma factor for non-linear gene paths. A higher
+            value will result in more extreme non-linear variations along a
+            path.
+  }
+}
+
+Those shown in brackets cannot currently be easily estimated from a real
+dataset by Splatter. This is also shown when a splatParams object is printed
+with parameters labelled as either (estimatable) or [not estimatable].
+}
+\examples{
+params <- splatParams()
+params
+}
+
diff --git a/man/splatter.Rd b/man/splatter.Rd
new file mode 100644
index 0000000..beda507
--- /dev/null
+++ b/man/splatter.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splatter-package.R
+\docType{package}
+\name{splatter}
+\alias{splatter}
+\alias{splatter-package}
+\title{splatter.}
+\description{
+\pkg{splatter} is a package for the well-documented and reproducible
+simulation of single-cell RNA-seq count data.
+}
+\details{
+As well as it's own simulation model \pkg{splatter} provides functions for
+the estimation of model parameters.
+}
+
diff --git a/man/updateParams.Rd b/man/updateParams.Rd
new file mode 100644
index 0000000..965ac85
--- /dev/null
+++ b/man/updateParams.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/params.R
+\name{updateParams}
+\alias{updateParams}
+\title{Update a splatParams object}
+\usage{
+updateParams(params, ...)
+}
+\arguments{
+\item{params}{the splatParams object to update.}
+
+\item{...}{Any parameters to update.}
+}
+\value{
+splatParms object with updated parameters
+}
+\description{
+Update any of the parameters in a splatParams object to have a new value.
+}
+\details{
+This function allows multiple parameters to be updated or set using a single
+simple function call. Parameters to update are specified by supplying
+additional arguments that follow the levels of the splatParams data structure
+separated by the "." character. For example
+\code{updateParams(params, nGenes = 100)} is equivalent to
+\code{params$nGenes <- 100} and \code{update(params, mean.rate = 1)} is
+equivalent to \code{params$mean$rate <- 1}. For more details of the available
+parameters and the splatParams data structure see \code{\link{splatParams}}.
+}
+\examples{
+params <- defaultParams()
+params
+# Set nGenes and nCells
+params <- updateParams(params, nGenes = 1000, nCells = 200)
+params
+# Set mean rate paramater and library size location parameter
+params <- updateParams(params, mean.rate = 1, lib.loc = 12)
+params
+}
+
-- 
GitLab