From 504b1d0b97143f55a7b55cf55fc04b171d46c1d4 Mon Sep 17 00:00:00 2001 From: Luke Zappia <lazappi@users.noreply.github.com> Date: Wed, 5 Oct 2016 12:26:20 +1100 Subject: [PATCH] Add params functions --- NAMESPACE | 5 + R/params.R | 267 +++++++++++++++++++++++++++++++++++++++ man/defaultParams.Rd | 20 +++ man/mergeParams.Rd | 29 +++++ man/print.splatParams.Rd | 22 ++++ man/splatParams.Rd | 106 ++++++++++++++++ man/splatter.Rd | 16 +++ man/updateParams.Rd | 40 ++++++ 8 files changed, 505 insertions(+) create mode 100644 R/params.R create mode 100644 man/defaultParams.Rd create mode 100644 man/mergeParams.Rd create mode 100644 man/print.splatParams.Rd create mode 100644 man/splatParams.Rd create mode 100644 man/splatter.Rd create mode 100644 man/updateParams.Rd diff --git a/NAMESPACE b/NAMESPACE index 6ae9268..7824ce6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method(print,splatParams) +export(defaultParams) +export(mergeParams) +export(splatParams) +export(updateParams) diff --git a/R/params.R b/R/params.R new file mode 100644 index 0000000..9e8b80b --- /dev/null +++ b/R/params.R @@ -0,0 +1,267 @@ +#' Create splatParams object +#' +#' S3 class for holding Splatter simulation parameters. +#' +#' @param ... parameters to set in the new params object, passed to +#' \code{\link{updateParams}}. +#' +#' @details +#' The splatParams object is a list based S3 object for holding simulation +#' parameters. It has the following sections and values: +#' +#' \itemize{ +#' \item nGenes - Number of genes to simulate. +#' \item nCells - Number of cells to simulate. +#' \item [groupCells] - Vector giving the number of cells in each simulation +#' group/path. +#' \item mean (mean parameters) +#' \itemize{ +#' \item rate - Rate parameter for the mean gamma simulation. +#' \item shape - Shape parameter for the mean gamma simulation. +#' } +#' \item lib (library size parameters) +#' \itemize{ +#' \item loc - Location (meanlog) parameter for the library size +#' log-normal distribution. +#' \item scale - Scale (sdlog) parameter for the library size log-normal +#' distribution. +#' } +#' \item out (expression outlier parameters) +#' \itemize{ +#' \item prob - Probability that a gene is an expression outlier. +#' \item loProb - Probability that an expression outlier gene is lowly +#' expressed. +#' \item facLoc - Location (meanlog) parameter for the expression outlier +#' factor log-normal distribution. +#' \item facScale - Scale (sdlog) parameter for the expression outlier +#' factor log-normal distribution. +#' } +#' \item de (differential expression parameters) +#' \itemize{ +#' \item [prob] - Probability that a gene is differentially expressed +#' between groups or paths. +#' \item [downProb] - Probability that differentially expressed gene is +#' down-regulated. +#' \item [facLoc] - Location (meanlog) parameter for the differential +#' expression factor log-normal distribution. +#' \item [facScale] - Scale (sdlog) parameter for the differential +#' expression factor log-normal distribution. +#' } +#' \item bcv (Biological Coefficient of Variation parameters) +#' \itemize{ +#' \item common - Underlying common dispersion across all genes. +#' \item DF - Degrees of Freedom for the BCV inverse chi-squared +#' distribution. +#' } +#' \item dropout (dropout parameters) +#' \itemize{ +#' \item present - Logical. Whether to simulate dropout. +#' \item mid - Midpoint parameter for the dropout logistic function. +#' \item shape - Shape parameter for the dropout logistic function. +#' } +#' \item path (differentiation path parameters) +#' \itemize{ +#' \item [from] - Vector giving the originating point of each path. This +#' allows path structure such as a cell type which differentiates +#' into an intermediate cell type that then differentiates into two +#' mature cell types. A path structure of this form would have a +#' "from" parameter of c(0, 1, 1) (where 0 is the origin). If no +#' vector is given all paths will start at the origin. +#' \item [length] - Vector giving the number of steps to simulate along +#' each path. If a single value is given it will be applied to all +#' paths. +#' \item [skew] - Vector giving the skew of each path. Values closer to 1 +#' will give more cells towards the starting population, values +#' closer to 0 will give more cells towards the final population. +#' If a single value is given it will be applied to all paths. +#' \item [nonlinearProb] - Probability that a gene follows a non-linear +#' path along the differentiation path. This allows more complex +#' gene patterns such as a gene being equally expressed at the +#' beginning an end of a path but lowly expressed in the middle. +#' \item [sigmaFac] - Sigma factor for non-linear gene paths. A higher +#' value will result in more extreme non-linear variations along a +#' path. +#' } +#' } +#' +#' Those shown in brackets cannot currently be easily estimated from a real +#' dataset by Splatter. This is also shown when a splatParams object is printed +#' with parameters labelled as either (estimatable) or [not estimatable]. +#' +#' @return List based S3 splatParams object +#' @examples +#' params <- splatParams() +#' params +#' @export +splatParams <- function(...) { + params <- list(nGenes = NA, nCells = NA, groupCells = NA, + mean = list(rate = NA, shape = NA), + lib = list(loc = NA, scale = NA), + out = list(prob = NA, loProb = NA, facLoc = NA, + facScale = NA), + de = list(prob = NA, downProb = NA, facLoc = NA, + facScale = NA), + bcv = list(common = NA, DF = NA), + dropout = list(present = NA, mid = NA, shape = NA), + path = list(from = NA, length = NA, skew = NA, + nonlinearProb = NA, sigmaFac = NA)) + + class(params) <- "splatParams" + + params <- updateParams(params, ...) + + return(params) +} + +#' Print splatParams object +#' +#' Pretty print the parameters stored in a splatParams object. Parameters are +#' labelled as either (estimatable) or [not estimatable]. +#' +#' @param x splatParams object to print. +#' @param ... further arguments passed to or from other methods. +#' +#' @examples +#' params <- defaultParams() +#' print(params) +#' @export +print.splatParams <- function(x, ...) { + + pp <- list("Global:" = c("(Genes)" = x$nGenes, "(Cells)" = x$nCells, + "[Group Cells]" = x$groupCells), + "Mean:" = c("(Rate)" = x$mean$rate, "(Shape)" = x$mean$shape), + "Library size:" = c("(Location)" = x$lib$loc, + "(Scale)" = x$lib$scale), + "Expression outliers:" = c("(Probability)" = x$out$prob, + "(Lo Probability)" = x$out$loProb, + "(Location)" = x$out$facLoc, + "(Scale)" = x$out$facScale), + "Differential expression:" = c("[Probability]" = x$de$prob, + "[Down Prob]" = x$de$downProb, + "[Location]" = x$de$facLoc, + "[Scale]" = x$de$facScale), + "BCV:" = c("(Common Disp)" = x$bcv$common, + "(Degrees of Freedom)" = x$bcv$DF), + "Dropout:" = c("(Present T/F)" = x$dropout$present, + "(Midpoint)" = x$dropout$mid, + "(Shape)" = x$dropout$shape), + "Paths:" = c("[From]" = x$path$from, "[Length]" = x$path$length, + "[Skew]" = x$path$skew, + "[Non-linear Prob]" = x$path$nonlinearProb, + "[Sigma Factor]" = x$path$sigmaFac)) + + for (category in names(pp)) { + cat(category, "\n") + print.default(pp[[category]], print.gap = 2) + cat("\n") + } +} + +#' Update a splatParams object +#' +#' Update any of the parameters in a splatParams object to have a new value. +#' +#' @param params the splatParams object to update. +#' @param ... Any parameters to update. +#' +#' @details +#' This function allows multiple parameters to be updated or set using a single +#' simple function call. Parameters to update are specified by supplying +#' additional arguments that follow the levels of the splatParams data structure +#' separated by the "." character. For example +#' \code{updateParams(params, nGenes = 100)} is equivalent to +#' \code{params$nGenes <- 100} and \code{update(params, mean.rate = 1)} is +#' equivalent to \code{params$mean$rate <- 1}. For more details of the available +#' parameters and the splatParams data structure see \code{\link{splatParams}}. +#' +#' @return splatParms object with updated parameters +#' @examples +#' params <- defaultParams() +#' params +#' # Set nGenes and nCells +#' params <- updateParams(params, nGenes = 1000, nCells = 200) +#' params +#' # Set mean rate paramater and library size location parameter +#' params <- updateParams(params, mean.rate = 1, lib.loc = 12) +#' params +#' @export +updateParams <- function(params, ...) { + + update <- list(...) + + if (length(update) == 0) { + return(params) + } + + update.names <- strsplit(names(update), ".", fixed = TRUE) + + for (idx in 1:length(update)) { + update.name <- update.names[[idx]] + value <- update[[idx]] + if (length(update.name) == 1) { + params[update.name] <- value + } else { + params[[update.name[1]]][update.name[2]] <- value + } + } + + return(params) +} + +#' Merge two splatParams objects +#' +#' Merge two splatParams objects. Any parameters that are NA in the first +#' splatParams object are replaced by the value in the second splatParams +#' object. +#' +#' @param params1 first splatParams object to merge. +#' @param params2 second splatParams object to merge. +#' +#' @return Merged splatParams object. +#' @examples +#' params <- splatParams(nGenes = 1000, nCells = 50) +#' params +#' # Replace unset parameters with default parameters +#' params <- mergeParams(params, defaultParams()) +#' params +#' @export +mergeParams <- function(params1, params2) { + + for (i in 1:length(params1)) { + for (j in 1:length(params1[[i]])) { + if (is.na(params1[[i]][[j]])) { + params1[[i]][[j]] <- params2[[i]][[j]] + } + } + } + + return(params1) +} + +#' Get default simulation parameters +#' +#' Get a splatParams object with a set of default parameters that will produce a +#' resonable simulation of single-cell RNA-seq count data. +#' +#' @return A splatParams object containing default parameters +#' @examples +#' params <- defaultParams() +#' params +#' @export +defaultParams <- function() { + + params <- splatParams() + + params <- updateParams(params, nGenes = 10000, nCells = 100, + groupCells = 100, mean.rate = 0.3, mean.shape = 0.4, + lib.loc = 10, lib.scale = 0.5, out.prob = 0.1, + out.loProb = 0.5, out.facLoc = 4, out.facScale = 1, + de.prob = 0.1, de.downProb = 0.5, de.facLoc = 4, + de.facScale = 1, bcv.common = 0.1, bcv.DF = 25, + dropout.present = TRUE, dropout.mid = 0, + dropout.shape = -1, path.from = 0, + path.length = 100, path.skew = 0.5, + path.nonlinearProb = 0.1, path.sigmaFac = 0.8) + + return(params) +} \ No newline at end of file diff --git a/man/defaultParams.Rd b/man/defaultParams.Rd new file mode 100644 index 0000000..18c9e43 --- /dev/null +++ b/man/defaultParams.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/params.R +\name{defaultParams} +\alias{defaultParams} +\title{Get default simulation parameters} +\usage{ +defaultParams() +} +\value{ +A splatParams object containing default parameters +} +\description{ +Get a splatParams object with a set of default parameters that will produce a +resonable simulation of single-cell RNA-seq count data. +} +\examples{ +params <- defaultParams() +params +} + diff --git a/man/mergeParams.Rd b/man/mergeParams.Rd new file mode 100644 index 0000000..b8b5f53 --- /dev/null +++ b/man/mergeParams.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/params.R +\name{mergeParams} +\alias{mergeParams} +\title{Merge two splatParams objects} +\usage{ +mergeParams(params1, params2) +} +\arguments{ +\item{params1}{first splatParams object to merge.} + +\item{params2}{second splatParams object to merge.} +} +\value{ +Merged splatParams object. +} +\description{ +Merge two splatParams objects. Any parameters that are NA in the first +splatParams object are replaced by the value in the second splatParams +object. +} +\examples{ +params <- splatParams(nGenes = 1000, nCells = 50) +params +# Replace unset parameters with default parameters +params <- mergeParams(params, defaultParams()) +params +} + diff --git a/man/print.splatParams.Rd b/man/print.splatParams.Rd new file mode 100644 index 0000000..fca0f4a --- /dev/null +++ b/man/print.splatParams.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/params.R +\name{print.splatParams} +\alias{print.splatParams} +\title{Print splatParams object} +\usage{ +\method{print}{splatParams}(x, ...) +} +\arguments{ +\item{x}{splatParams object to print.} + +\item{...}{further arguments passed to or from other methods.} +} +\description{ +Pretty print the parameters stored in a splatParams object. Parameters are +labelled as either (estimatable) or [not estimatable]. +} +\examples{ +params <- defaultParams() +print(params) +} + diff --git a/man/splatParams.Rd b/man/splatParams.Rd new file mode 100644 index 0000000..e1bf1b4 --- /dev/null +++ b/man/splatParams.Rd @@ -0,0 +1,106 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/params.R +\name{splatParams} +\alias{splatParams} +\title{Create splatParams object} +\usage{ +splatParams(...) +} +\arguments{ +\item{...}{parameters to set in the new params object, passed to +\code{\link{updateParams}}.} +} +\value{ +List based S3 splatParams object +} +\description{ +S3 class for holding Splatter simulation parameters. +} +\details{ +The splatParams object is a list based S3 object for holding simulation +parameters. It has the following sections and values: + +\itemize{ + \item nGenes - Number of genes to simulate. + \item nCells - Number of cells to simulate. + \item [groupCells] - Vector giving the number of cells in each simulation + group/path. + \item mean (mean parameters) + \itemize{ + \item rate - Rate parameter for the mean gamma simulation. + \item shape - Shape parameter for the mean gamma simulation. + } + \item lib (library size parameters) + \itemize{ + \item loc - Location (meanlog) parameter for the library size + log-normal distribution. + \item scale - Scale (sdlog) parameter for the library size log-normal + distribution. + } + \item out (expression outlier parameters) + \itemize{ + \item prob - Probability that a gene is an expression outlier. + \item loProb - Probability that an expression outlier gene is lowly + expressed. + \item facLoc - Location (meanlog) parameter for the expression outlier + factor log-normal distribution. + \item facScale - Scale (sdlog) parameter for the expression outlier + factor log-normal distribution. + } + \item de (differential expression parameters) + \itemize{ + \item [prob] - Probability that a gene is differentially expressed + between groups or paths. + \item [downProb] - Probability that differentially expressed gene is + down-regulated. + \item [facLoc] - Location (meanlog) parameter for the differential + expression factor log-normal distribution. + \item [facScale] - Scale (sdlog) parameter for the differential + expression factor log-normal distribution. + } + \item bcv (Biological Coefficient of Variation parameters) + \itemize{ + \item common - Underlying common dispersion across all genes. + \item DF - Degrees of Freedom for the BCV inverse chi-squared + distribution. + } + \item dropout (dropout parameters) + \itemize{ + \item present - Logical. Whether to simulate dropout. + \item mid - Midpoint parameter for the dropout logistic function. + \item shape - Shape parameter for the dropout logistic function. + } + \item path (differentiation path parameters) + \itemize{ + \item [from] - Vector giving the originating point of each path. This + allows path structure such as a cell type which differentiates + into an intermediate cell type that then differentiates into two + mature cell types. A path structure of this form would have a + "from" parameter of c(0, 1, 1) (where 0 is the origin). If no + vector is given all paths will start at the origin. + \item [length] - Vector giving the number of steps to simulate along + each path. If a single value is given it will be applied to all + paths. + \item [skew] - Vector giving the skew of each path. Values closer to 1 + will give more cells towards the starting population, values + closer to 0 will give more cells towards the final population. + If a single value is given it will be applied to all paths. + \item [nonlinearProb] - Probability that a gene follows a non-linear + path along the differentiation path. This allows more complex + gene patterns such as a gene being equally expressed at the + beginning an end of a path but lowly expressed in the middle. + \item [sigmaFac] - Sigma factor for non-linear gene paths. A higher + value will result in more extreme non-linear variations along a + path. + } +} + +Those shown in brackets cannot currently be easily estimated from a real +dataset by Splatter. This is also shown when a splatParams object is printed +with parameters labelled as either (estimatable) or [not estimatable]. +} +\examples{ +params <- splatParams() +params +} + diff --git a/man/splatter.Rd b/man/splatter.Rd new file mode 100644 index 0000000..beda507 --- /dev/null +++ b/man/splatter.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splatter-package.R +\docType{package} +\name{splatter} +\alias{splatter} +\alias{splatter-package} +\title{splatter.} +\description{ +\pkg{splatter} is a package for the well-documented and reproducible +simulation of single-cell RNA-seq count data. +} +\details{ +As well as it's own simulation model \pkg{splatter} provides functions for +the estimation of model parameters. +} + diff --git a/man/updateParams.Rd b/man/updateParams.Rd new file mode 100644 index 0000000..965ac85 --- /dev/null +++ b/man/updateParams.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/params.R +\name{updateParams} +\alias{updateParams} +\title{Update a splatParams object} +\usage{ +updateParams(params, ...) +} +\arguments{ +\item{params}{the splatParams object to update.} + +\item{...}{Any parameters to update.} +} +\value{ +splatParms object with updated parameters +} +\description{ +Update any of the parameters in a splatParams object to have a new value. +} +\details{ +This function allows multiple parameters to be updated or set using a single +simple function call. Parameters to update are specified by supplying +additional arguments that follow the levels of the splatParams data structure +separated by the "." character. For example +\code{updateParams(params, nGenes = 100)} is equivalent to +\code{params$nGenes <- 100} and \code{update(params, mean.rate = 1)} is +equivalent to \code{params$mean$rate <- 1}. For more details of the available +parameters and the splatParams data structure see \code{\link{splatParams}}. +} +\examples{ +params <- defaultParams() +params +# Set nGenes and nCells +params <- updateParams(params, nGenes = 1000, nCells = 200) +params +# Set mean rate paramater and library size location parameter +params <- updateParams(params, mean.rate = 1, lib.loc = 12) +params +} + -- GitLab