diff --git a/DESCRIPTION b/DESCRIPTION index 4741a42c495e947b8c0168472dc56e4c139024f2..463be27de6d5b5cf7ccd000ffcc20ca0c029ec3d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 1.9.7.9016 -Date: 2019-10-01 +Version: 1.9.7.9017 +Date: 2019-10-09 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index 08a786f5248056391624209b122e8d5908e99d35..98b13729d5c9bbd1bdb2876da4d2cca055b764a0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -67,6 +67,16 @@ export(splatSimulateGroups) export(splatSimulatePaths) export(splatSimulateSingle) export(splotchEstimate) +export(splotchGenNetwork) +export(splotchSample) +export(splotchSelectRegs) +export(splotchSetup) +export(splotchSimAmbientCounts) +export(splotchSimCellCounts) +export(splotchSimCellMeans) +export(splotchSimCounts) +export(splotchSimGeneMeans) +export(splotchSimLibSizes) export(splotchSimulate) export(summariseDiff) export(zinbEstimate) diff --git a/NEWS.md b/NEWS.md index 43dd6ea16a8847de26db27df6a2c739b12289f1a..938037b237e9259b1dd622cc839e414d5cb3b0fd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +### Version 1.9.7.9017 (2019-10-09) + +* Document Splotch simulation functions + ### Version 1.9.7.9016 (2019-10-01) * Change the default SplotchParams cell design to place cells at end of path diff --git a/R/AllClasses.R b/R/AllClasses.R index d53646c66cb67edd8651d9cfd52d459d9984fd66..50ff73b95091b90c7331d4d443253e199635d6a3 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -307,7 +307,7 @@ setClass("SplatParams", #' \describe{ #' \item{\code{[paths.programs]}}{Number of expression programs.} #' \item{\code{[paths.design]}}{data.frame describing path -#' structure.} +#' structure. See \code{\link{splotchSimPaths}} for details.} #' } #' } #' \item{\emph{Library size parameters}}{ @@ -328,7 +328,7 @@ setClass("SplatParams", #' \item{\emph{Design parameters}}{ #' \describe{ #' \item{\code{[cells.design]}}{data.frame describing cell -#' structure.} +#' structure. See \code{\link{splotchSimCellMeans}} for details.} #' } #' } #' \item{\emph{Doublet parameters}}{ diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R index e8bc02604394e45470bbda31009ef16364f1dd00..de38b02ae2abb27455dd2472693d45b3b5c9f307 100644 --- a/R/SplotchParams-methods.R +++ b/R/SplotchParams-methods.R @@ -13,7 +13,8 @@ newSplotchParams <- function(...) { msg <- paste("The Splotch simulation is still experimental and may produce", "unreliable results. Please try it and report any issues to", - "https://github.com/Oshlack/splatter/issues.") + "https://github.com/Oshlack/splatter/issues. The development + version may have improved features.") rlang:::warn_deprecated(msg, id = "warn.splotch") params <- new("SplotchParams") diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R index 8096a1f21ffc6bfef3c0d30757ea1cd9073cbc6f..1b7318303fd69bb9c7df5e55d828141fe59f5845 100644 --- a/R/splotch-simulate.R +++ b/R/splotch-simulate.R @@ -1,6 +1,6 @@ #' Splotch simulation #' -#' Simulate counts from... +#' Simulate scRNA-seq count data using the Splotch model #' #' @param params SplotchParams object containing simulation parameters. #' @param verbose logical. Whether to print progress messages @@ -8,14 +8,22 @@ #' \code{params}. #' #' @details -#' Details... #' -#' @return SingleCellExperiment containing simulated counts +#' This functions is for simulating data in a single step. It consists of a +#' call to \code{\link{splotchSetup}} followed by a call to +#' \code{\link{splotchSample}}. Please see the documentation for those functions +#' for more details of the individual steps. +#' +#' @seealso +#' \code{\link{splotchSetup}}, \code{\link{splotchSample}} +#' +#' @return SingleCellExperiment containing simulated counts and intermediate +#' values +#' #' @examples #' sim <- splotchSimulate() #' #' @export -#' @importFrom SingleCellExperiment SingleCellExperiment splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { params <- splotchSetup(params, verbose, ...) @@ -24,6 +32,53 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { return(sim) } +#' Splotch setup +#' +#' Setup the parameters required for the Splotch simulation +#' +#' @param params SplotchParams object containing simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' @param ... any additional parameter settings to override what is provided in +#' \code{params}. +#' +#' @details +#' The first stage is a two-step Splotch simulation is to generate some of the +#' intermediate parameters. The resulting parameters allow multiple simulated +#' datasets to be generated from the same biological structure (using +#' \code{\link{splotchSample}}). As with all the other parameters these values +#' can be manually overwritten if desired. +#' +#' The setup involves the following steps: +#' \enumerate{ +#' \item Generate a gene network (if not already present) +#' \item Select regulator genes (if not already present) +#' \item Simulate gene means (if not already present) +#' \item Simulate cell paths +#' } +#' +#' The resulting \code{\link{SplotchParams}} object will have the following +#' parameters set (if they weren't already). +#' +#' \itemize{ +#' \item \code{mean.values} +#' \item \code{network.graph} +#' \item \code{network.regsSet} +#' \item \code{paths.means} +#' } +#' +#' See \code{\link{SplotchParams}} for more details about these parameters and +#' the functions for the individual steps for more details about the process. +#' +#' @seealso +#' \code{\link{splotchGenNetwork}}, \code{\link{splotchSelectRegs}}, +#' \code{\link{splotchSimGeneMeans}}, \code{\link{splotchSimPaths}}, +#' \code{\link{SplotchParams}} +#' +#' @return A complete SplotchParams object +#' @export +#' +#' @examples +#' params <- splotchSetup() splotchSetup <- function(params = newSplotchParams(), verbose = TRUE, ...) { checkmate::assertClass(params, "SplotchParams") @@ -42,6 +97,90 @@ splotchSetup <- function(params = newSplotchParams(), verbose = TRUE, ...) { return(params) } +#' Splotch sample +#' +#' Sample cells for the Splotch simulation +#' +#' @param params SplotchParams object containing simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' The second stage is a two-step Splotch simulation is to generate cells based +#' on a complete \code{\link{SplotchParams}} object. +#' intermediate parameters. +#' +#' The sampling process involves the following steps: +#' \enumerate{ +#' \item Simulate library sizes for each cell +#' \item Simulate means for each cell +#' \item Simulate endogenous counts for each cell +#' \item Simulate ambient counts for each cell +#' \item Simulate final counts for each cell +#' } +#' +#' The final output is a +#' \code{\link[SingleCellExperiment]{SingleCellExperiment}} object that +#' contains the simulated counts but also the values for various intermediate +#' steps. These are stored in the \code{\link{colData}} (for cell specific +#' information), \code{\link{rowData}} (for gene specific information) or +#' \code{\link{assays}} (for gene by cell matrices) slots. This additional +#' information includes: +#' \describe{ +#' \item{\code{colData}}{ +#' \describe{ +#' \item{Cell}{Unique cell identifier.} +#' \item{Type}{Whether the cell is a Cell, Doublet or Empty.} +#' \item{CellLibSize}{The expected number of endogenous counts for +#' that cell.} +#' \item{AmbientLibSize}{The expected number of ambient counts for +#' that cell.} +#' \item{Path}{The path the cell belongs to.} +#' \item{Step}{How far along the path each cell is.} +#' \item{Path1}{For doublets the path of the first partner in the +#' doublet (otherwise \code{NA}).} +#' \item{Step1}{For doublets the step of the first partner in the +#' doublet (otherwise \code{NA}).} +#' \item{Path2}{For doublets the path of the second partner in the +#' doublet (otherwise \code{NA}).} +#' \item{Step2}{For doublets the step of the second partner in the +#' doublet (otherwise \code{NA}).} +#' } +#' } +#' \item{\code{rowData}}{ +#' \describe{ +#' \item{Gene}{Unique gene identifier.} +#' \item{BaseMean}{The base expression level for that gene.} +#' \item{AmbientMean}{The ambient expression level for that gene.} +#' } +#' } +#' \item{\code{assays}}{ +#' \describe{ +#' \item{CellMeans}{The mean expression of genes in each cell +#' after any differential expression and adjusted for expected +#' library size.} +#' \item{CellCounts}{Endogenous count matrix.} +#' \item{AmbientCounts}{Ambient count matrix.} +#' \item{counts}{Final count matrix.} +#' } +#' } +#' } +#' +#' Values that have been added by Splatter are named using \code{UpperCamelCase} +#' in order to differentiate them from the values added by analysis packages +#' which typically use \code{underscore_naming}. +#' +#' @seealso +#' \code{\link{splotchSimLibSizes}}, \code{\link{splotchSimCellMeans}}, +#' \code{\link{splotchSimCellCounts}}, \code{\link{splotchSimAmbientCounts}}, +#' \code{\link{splotchSimCounts}} +#' +#' @return SingleCellExperiment object containing the simulated counts and +#' intermediate values. +#' @export +#' +#' @examples +#' params <- splotchSetup() +#' sim <- splotchSample(params) splotchSample <- function(params, verbose = TRUE) { # Check that parameters are set up @@ -104,6 +243,20 @@ splotchSample <- function(params, verbose = TRUE) { } +#' Generate Splotch gene network +#' +#' Generate a gene network for the Splotch simulation +#' +#' @param params SplotchParams object containing simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' Currently a very simple approach is used which needs to be improved. A +#' network is generated using the \code{\link[igraph]{sample_forestfire}} +#' function and edge weights are sampled from a standard normal distribution. +#' +#' @return SplotchParams object with gene network +#' @export splotchGenNetwork <- function(params, verbose) { nGenes <- getParam(params, "nGenes") @@ -127,6 +280,20 @@ splotchGenNetwork <- function(params, verbose) { return(params) } +#' Select Splotch regulators +#' +#' Select regulator genes in the gene network for a Splotch simulation +#' +#' @param params SplotchParams object containing simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' Regulators are randomly selected, weighted according to the difference +#' between their out degree and in degree. This is an arbitary weighting and +#' may be improved or replace in the future. +#' +#' @return SplotchParams object with gene regulators +#' @export splotchSelectRegs <- function(params, verbose) { network.regsSet <- getParam(params, "network.regsSet") @@ -156,6 +323,28 @@ splotchSelectRegs <- function(params, verbose) { return(params) } +#' Simulate Splotch gene means +#' +#' @param params SplotchParams object containing simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' Gene means are simulated in one of two ways depending on the value of the +#' \code{mean.method} parameter. +#' +#' If \code{mean.method} is "fit" (default) then means are sampled from a Gamma +#' distribution with shape equals \code{mean.shape} and rate equals +#' \code{mean.rate}. Expression outliers are then added by replacing some +#' values with the median multiplied by a factor from a log-normal distribution. +#' This is the same process used for the Splat simulation. +#' +#' If \code{mean.method} is "density" then means are sampled from the +#' density object in the \code{mean.density} parameter using a rejection +#' sampling method. This approach is more flexible but may violate some +#' statistical assumptions. +#' +#' @return SplotchParams object with gene means +#' @export splotchSimGeneMeans <- function(params, verbose) { mean.values <- getParam(params, "mean.values") @@ -198,6 +387,43 @@ splotchSimGeneMeans <- function(params, verbose) { return(params) } +#' Simulate Splotch paths +#' +#' Simulate gene means for each step along each path of a Splotch simulation +#' +#' @param params SplotchParams object containing simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' The method of simulating paths is inspired by the method used in the PROSSTT +#' simulation. Changes in expression are controlled by \code{paths.nPrograms} +#' regulatory programs. Each of the regulatory genes in the gene network has +#' some association with each program. This is analogous to there being changes +#' in the environment (the programs) which are sensed by receptors (regulatory +#' genes) and cause changes in expression downstream. For each path a random +#' walk is generated for each program and the changes passed on to the +#' regulatory genes. At each step the changes progagate through the network +#' according to the weights on edges between genes. This algorithm is fairly +#' simple but should result in correlation relationships between genes. However +#' it is likely to be improved and adjusted in the future. +#' +#' The path structure itself is specified by the \code{paths.design} parameter. +#' This is a \code{data.frame} with three columns: "Path", "From", and "Steps". +#' The Path field is an ID for each path while the Steps field controls the +#' length of each path. Increasing the number of steps will increase the +#' difference in expression between the ends of the paths. The From field sets +#' the originating point of each path. For example a From of \code{0, 0, 0} +#' would indicate three paths from the origin while a From of \code{0, 1, 1} +#' would give a branching structure with Path 1 beginning at the origin and +#' Path 2 and Path 3 beginning at the end of Path 1. +#' +#' @references +#' +#' Papadopoulos N, Parra RG, Söding J. PROSSTT: probabilistic simulation of +#' single-cell RNA-seq data for complex differentiation processes. +#' Bioinformatics (2019). \url{https://doi.org/10.1093/bioinformatics/btz078}. +#' +#' @return SplotchParams object with path means splotchSimPaths <- function(params, verbose) { paths.means <- getParam(params, "paths.means") @@ -270,6 +496,33 @@ splotchSimPaths <- function(params, verbose) { return(params) } +#' Simulate Splotch library sizes +#' +#' Generate library sizes for cells in the Splotch simulatilon +#' +#' @param sim SingleCellExperiment containing simulation. +#' @param params SplotParams object with simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' Library sizes are simulated in one of two ways depending on the value of the +#' \code{lib.method} parameter. +#' +#' If \code{lib.method} is "fit" (default) then means are sampled from a +#' log-normal distribution with meanlog equals \code{lib.loc} and sdlog equals +#' \code{lib.scale}. +#' +#' If \code{mean.method} is "density" then library sizes are sampled from the +#' density object in the \code{lib.density} parameter using a rejection +#' sampling method. This approach is more flexible but may violate some +#' statistical assumptions. +#' +#' Ambient library sizes are also generated from a log-normal distribution based +#' on the parameters for the cell library size and adjusted using the +#' \code{ambient.scale} parameter. +#' +#' @return SingleCellExperiment with library sizes +#' @export splotchSimLibSizes <- function(sim, params, verbose) { if (verbose) {message("Simulating library sizes...")} @@ -306,7 +559,58 @@ splotchSimLibSizes <- function(sim, params, verbose) { return(sim) } -#' @importFrom stats dbeta +#' Simulate Splotch cell means +#' +#' Simulate endogenous counts for each cell in a Splotch simulation +#' +#' @param sim SingleCellExperiment containing simulation. +#' @param params SplotParams object with simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' Cells are first assigned to a path and a step along that path. This is +#' controlled by the \code{cells.design} parameter which is a \code{data.frame} +#' with the columns "Path", "Probability", "Alpha" and "Beta". The Path field +#' is an ID for each path and the Probabilty field is the probability that a +#' cell will come from that path (must sum to 1). The Alpha and Beta parameters +#' control the density of cells along the path. After they are assigned to paths +#' the step for each cell is sampled from a Beta distribution with paramaters +#' shape1 equals Alpha and shape2 equals beta. This approach is very flexible +#' and allows almost any distribution of cells along a path. The distribution +#' can be viewed using \code{hist(rbeta(10000, Alpha, Beta), breaks = 100)}. +#' Some useful combinations of parameters are: +#' +#' \describe{ +#' \item{\code{Alpha = 1}, \code{Beta = 1}}{Uniform distribution along the +#' path} +#' \item{\code{Alpha = 0}, \code{Beta = 1}}{All cells at the start of the +#' path.} +#' \item{\code{Alpha = 1}, \code{Beta = 0}}{All cells at the end of the +#' path.} +#' \item{\code{Alpha = 0}, \code{Beta = 0}}{Cells only at each end of the +#' path.} +#' \item{\code{Alpha = 1}, \code{Beta = 2}}{Linear skew towards the start +#' of the path} +#' \item{\code{Alpha = 0.5}, \code{Beta = 1}}{Curved skew towards the start +#' of the path} +#' \item{\code{Alpha = 2}, \code{Beta = 1}}{Linear skew towards the end +#' of the path} +#' \item{\code{Alpha = 1}, \code{Beta = 0.5}}{Curved skew towards the end +#' of the path} +#' \item{\code{Alpha = 0.5}, \code{Beta = 0.5}}{Curved skew towards both +#' ends of the path} +#' \item{\code{Alpha = 0.5}, \code{Beta = 0.5}}{Curved skew away from both +#' ends of the path} +#' } +#' +#' Once cells are assigned to paths and steps the correct means are extracted +#' from the \code{paths.means} parameter and adjusted based on each cell's +#' library size. An adjustment for BCV is then applied. Doublets are also +#' simulated at this stage by selecting two path/step combinations and averaging +#' the means. +#' +#' @return SingleCellExperiment with cell means +#' @export splotchSimCellMeans <- function(sim, params, verbose) { cell.names <- colData(sim)$Cell @@ -435,6 +739,20 @@ splotchSimCellMeans <- function(sim, params, verbose) { return(sim) } +#' Simulate Splotch cell counts +#' +#' Simulate cell counts for the Splotch simulation +#' +#' @param sim SingleCellExperiment containing simulation. +#' @param params SplotParams object with simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' Counts are sampled from a Poisson distribution with lambda equal to the +#' cell means matrix. +#' +#' @return SingleCellExperiment with cell counts +#' @export splotchSimCellCounts <- function(sim, params, verbose) { if (verbose) {message("Simulating cell counts...")} @@ -457,6 +775,20 @@ splotchSimCellCounts <- function(sim, params, verbose) { return(sim) } +#' Simulate Splotch ambient counts +#' +#' @param sim SingleCellExperiment containing simulation. +#' @param params SplotParams object with simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' The overall expression profile to calculated by averaging the cell counts +#' of the (non-empty) cells. This is then multiplied by the ambient library +#' sizes to get a mean for each cell. Counts are then sampled from a Poisson +#' distribution using these means. +#' +#' @return SingleCellExperiment with ambient counts +#' @export splotchSimAmbientCounts <- function(sim, params, verbose) { if (verbose) {message("Simulating ambient counts...")} @@ -487,6 +819,24 @@ splotchSimAmbientCounts <- function(sim, params, verbose) { return(sim) } +#' Simulate Splotch final counts +#' +#' Simulate the final counts matrix for a Splotch simulation +#' +#' @param sim SingleCellExperiment containing simulation. +#' @param params SplotParams object with simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' The cell counts matrix and ambient counts matrix are added together. The +#' result is then downsampled to the cell library size (for cells and doublets) +#' or the ambient library size (for empty cells) using the +#' \code{\link[DropletUtils]{downsampleMatrix}} function. +#' +#' @seealso \code{\link[DropletUtils]{downsampleMatrix}} +#' +#' @return SingleCellExperiment with counts matrix +#' @export splotchSimCounts <- function(sim, params, verbose) { if (verbose) {message("Simulating final counts...")} @@ -512,6 +862,20 @@ splotchSimCounts <- function(sim, params, verbose) { return(sim) } +#' Get Beta step probabilities +#' +#' Use a Beta distribution for set probabilities along a path +#' +#' @param steps Number of steps +#' @param alpha Alpha parameter +#' @param beta Beta parameter +#' +#' @details +#' The density is sampled from a Beta distribution between 0 and 1. Infinite +#' densities at edges are adjusted and then the values are scaled to give +#' probabilities. +#' +#' @importFrom stats dbeta getBetaStepProbs <- function(steps, alpha, beta) { dens <- dbeta(seq(0, 1, length.out = steps), alpha, beta) @@ -532,6 +896,20 @@ getBetaStepProbs <- function(steps, alpha, beta) { return(probs) } +#' Sample density +#' +#' Sample from a density objet using rejection sampling +#' +#' @param n Number of values to sample +#' @param dens Density object to sample from +#' @param lower Lower x-axis bound on sampled values +#' +#' @details +#' Random points (x and y) are generated inside the range of the density object. +#' If they value is less than the density for that x value (and x is greater +#' than \code{lower}) then that x value is retained. Ten thousand points are +#' generated at a time until enough valid values have been sampled. +#' #' @importFrom stats approxfun sampleDensity <- function(n, dens, lower = 0) { diff --git a/man/SplotchParams.Rd b/man/SplotchParams.Rd index 7fb9c84b5127918365e967b4d1418994a24cd095..81d418b5bc0f401ef7e5f3866a4ea7860661974d 100644 --- a/man/SplotchParams.Rd +++ b/man/SplotchParams.Rd @@ -57,7 +57,7 @@ The Splotch simulation uses the following parameters: \describe{ \item{\code{[paths.programs]}}{Number of expression programs.} \item{\code{[paths.design]}}{data.frame describing path - structure.} + structure. See \code{\link{splotchSimPaths}} for details.} } } \item{\emph{Library size parameters}}{ @@ -78,7 +78,7 @@ The Splotch simulation uses the following parameters: \item{\emph{Design parameters}}{ \describe{ \item{\code{[cells.design]}}{data.frame describing cell - structure.} + structure. See \code{\link{splotchSimCellMeans}} for details.} } } \item{\emph{Doublet parameters}}{ diff --git a/man/getBetaStepProbs.Rd b/man/getBetaStepProbs.Rd new file mode 100644 index 0000000000000000000000000000000000000000..f80354d235918c95dcd04929936dc8652db2d574 --- /dev/null +++ b/man/getBetaStepProbs.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{getBetaStepProbs} +\alias{getBetaStepProbs} +\title{Get Beta step probabilities} +\usage{ +getBetaStepProbs(steps, alpha, beta) +} +\arguments{ +\item{steps}{Number of steps} + +\item{alpha}{Alpha parameter} + +\item{beta}{Beta parameter} +} +\description{ +Use a Beta distribution for set probabilities along a path +} +\details{ +The density is sampled from a Beta distribution between 0 and 1. Infinite +densities at edges are adjusted and then the values are scaled to give +probabilities. +} diff --git a/man/sampleDensity.Rd b/man/sampleDensity.Rd new file mode 100644 index 0000000000000000000000000000000000000000..e9473b000978679c9fd2fbcbd42197ac660f80bd --- /dev/null +++ b/man/sampleDensity.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{sampleDensity} +\alias{sampleDensity} +\title{Sample density} +\usage{ +sampleDensity(n, dens, lower = 0) +} +\arguments{ +\item{n}{Number of values to sample} + +\item{dens}{Density object to sample from} + +\item{lower}{Lower x-axis bound on sampled values} +} +\description{ +Sample from a density objet using rejection sampling +} +\details{ +Random points (x and y) are generated inside the range of the density object. +If they value is less than the density for that x value (and x is greater +than \code{lower}) then that x value is retained. Ten thousand points are +generated at a time until enough valid values have been sampled. +} diff --git a/man/splotchGenNetwork.Rd b/man/splotchGenNetwork.Rd new file mode 100644 index 0000000000000000000000000000000000000000..f0fab8841cff1ae771417e1c33a954f87b4981ba --- /dev/null +++ b/man/splotchGenNetwork.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{splotchGenNetwork} +\alias{splotchGenNetwork} +\title{Generate Splotch gene network} +\usage{ +splotchGenNetwork(params, verbose) +} +\arguments{ +\item{params}{SplotchParams object containing simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SplotchParams object with gene network +} +\description{ +Generate a gene network for the Splotch simulation +} +\details{ +Currently a very simple approach is used which needs to be improved. A +network is generated using the \code{\link[igraph]{sample_forestfire}} +function and edge weights are sampled from a standard normal distribution. +} diff --git a/man/splotchSample.Rd b/man/splotchSample.Rd new file mode 100644 index 0000000000000000000000000000000000000000..610281efb18d443f67543ea81cd87d0e030a9b05 --- /dev/null +++ b/man/splotchSample.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{splotchSample} +\alias{splotchSample} +\title{Splotch sample} +\usage{ +splotchSample(params, verbose = TRUE) +} +\arguments{ +\item{params}{SplotchParams object containing simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SingleCellExperiment object containing the simulated counts and +intermediate values. +} +\description{ +Sample cells for the Splotch simulation +} +\details{ +The second stage is a two-step Splotch simulation is to generate cells based +on a complete \code{\link{SplotchParams}} object. +intermediate parameters. + +The sampling process involves the following steps: +\enumerate{ + \item Simulate library sizes for each cell + \item Simulate means for each cell + \item Simulate endogenous counts for each cell + \item Simulate ambient counts for each cell + \item Simulate final counts for each cell +} + +The final output is a +\code{\link[SingleCellExperiment]{SingleCellExperiment}} object that +contains the simulated counts but also the values for various intermediate +steps. These are stored in the \code{\link{colData}} (for cell specific +information), \code{\link{rowData}} (for gene specific information) or +\code{\link{assays}} (for gene by cell matrices) slots. This additional +information includes: +\describe{ + \item{\code{colData}}{ + \describe{ + \item{Cell}{Unique cell identifier.} + \item{Type}{Whether the cell is a Cell, Doublet or Empty.} + \item{CellLibSize}{The expected number of endogenous counts for + that cell.} + \item{AmbientLibSize}{The expected number of ambient counts for + that cell.} + \item{Path}{The path the cell belongs to.} + \item{Step}{How far along the path each cell is.} + \item{Path1}{For doublets the path of the first partner in the + doublet (otherwise \code{NA}).} + \item{Step1}{For doublets the step of the first partner in the + doublet (otherwise \code{NA}).} + \item{Path2}{For doublets the path of the second partner in the + doublet (otherwise \code{NA}).} + \item{Step2}{For doublets the step of the second partner in the + doublet (otherwise \code{NA}).} + } + } + \item{\code{rowData}}{ + \describe{ + \item{Gene}{Unique gene identifier.} + \item{BaseMean}{The base expression level for that gene.} + \item{AmbientMean}{The ambient expression level for that gene.} + } + } + \item{\code{assays}}{ + \describe{ + \item{CellMeans}{The mean expression of genes in each cell + after any differential expression and adjusted for expected + library size.} + \item{CellCounts}{Endogenous count matrix.} + \item{AmbientCounts}{Ambient count matrix.} + \item{counts}{Final count matrix.} + } + } +} + +Values that have been added by Splatter are named using \code{UpperCamelCase} +in order to differentiate them from the values added by analysis packages +which typically use \code{underscore_naming}. +} +\examples{ +params <- splotchSetup() +sim <- splotchSample(params) +} +\seealso{ +\code{\link{splotchSimLibSizes}}, \code{\link{splotchSimCellMeans}}, +\code{\link{splotchSimCellCounts}}, \code{\link{splotchSimAmbientCounts}}, +\code{\link{splotchSimCounts}} +} diff --git a/man/splotchSelectRegs.Rd b/man/splotchSelectRegs.Rd new file mode 100644 index 0000000000000000000000000000000000000000..106946c3f49a6775cd845368192a074b5f6f74a9 --- /dev/null +++ b/man/splotchSelectRegs.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{splotchSelectRegs} +\alias{splotchSelectRegs} +\title{Select Splotch regulators} +\usage{ +splotchSelectRegs(params, verbose) +} +\arguments{ +\item{params}{SplotchParams object containing simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SplotchParams object with gene regulators +} +\description{ +Select regulator genes in the gene network for a Splotch simulation +} +\details{ +Regulators are randomly selected, weighted according to the difference +between their out degree and in degree. This is an arbitary weighting and +may be improved or replace in the future. +} diff --git a/man/splotchSetup.Rd b/man/splotchSetup.Rd new file mode 100644 index 0000000000000000000000000000000000000000..6017de52b87a3b848d3a3afcea333995d41810ed --- /dev/null +++ b/man/splotchSetup.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{splotchSetup} +\alias{splotchSetup} +\title{Splotch setup} +\usage{ +splotchSetup(params = newSplotchParams(), verbose = TRUE, ...) +} +\arguments{ +\item{params}{SplotchParams object containing simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} + +\item{...}{any additional parameter settings to override what is provided in +\code{params}.} +} +\value{ +A complete SplotchParams object +} +\description{ +Setup the parameters required for the Splotch simulation +} +\details{ +The first stage is a two-step Splotch simulation is to generate some of the +intermediate parameters. The resulting parameters allow multiple simulated +datasets to be generated from the same biological structure (using +\code{\link{splotchSample}}). As with all the other parameters these values +can be manually overwritten if desired. + +The setup involves the following steps: +\enumerate{ + \item Generate a gene network (if not already present) + \item Select regulator genes (if not already present) + \item Simulate gene means (if not already present) + \item Simulate cell paths +} + +The resulting \code{\link{SplotchParams}} object will have the following +parameters set (if they weren't already). + +\itemize{ + \item \code{mean.values} + \item \code{network.graph} + \item \code{network.regsSet} + \item \code{paths.means} +} + +See \code{\link{SplotchParams}} for more details about these parameters and +the functions for the individual steps for more details about the process. +} +\examples{ +params <- splotchSetup() +} +\seealso{ +\code{\link{splotchGenNetwork}}, \code{\link{splotchSelectRegs}}, +\code{\link{splotchSimGeneMeans}}, \code{\link{splotchSimPaths}}, +\code{\link{SplotchParams}} +} diff --git a/man/splotchSimAmbientCounts.Rd b/man/splotchSimAmbientCounts.Rd new file mode 100644 index 0000000000000000000000000000000000000000..645e2f5d65cabcd06241a07eb452dd69ea7fedfe --- /dev/null +++ b/man/splotchSimAmbientCounts.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{splotchSimAmbientCounts} +\alias{splotchSimAmbientCounts} +\title{Simulate Splotch ambient counts} +\usage{ +splotchSimAmbientCounts(sim, params, verbose) +} +\arguments{ +\item{sim}{SingleCellExperiment containing simulation.} + +\item{params}{SplotParams object with simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SingleCellExperiment with ambient counts +} +\description{ +Simulate Splotch ambient counts +} +\details{ +The overall expression profile to calculated by averaging the cell counts +of the (non-empty) cells. This is then multiplied by the ambient library +sizes to get a mean for each cell. Counts are then sampled from a Poisson +distribution using these means. +} diff --git a/man/splotchSimCellCounts.Rd b/man/splotchSimCellCounts.Rd new file mode 100644 index 0000000000000000000000000000000000000000..70a9d0b55107094647038002e4a09edcadf25b9a --- /dev/null +++ b/man/splotchSimCellCounts.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{splotchSimCellCounts} +\alias{splotchSimCellCounts} +\title{Simulate Splotch cell counts} +\usage{ +splotchSimCellCounts(sim, params, verbose) +} +\arguments{ +\item{sim}{SingleCellExperiment containing simulation.} + +\item{params}{SplotParams object with simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SingleCellExperiment with cell counts +} +\description{ +Simulate cell counts for the Splotch simulation +} +\details{ +Counts are sampled from a Poisson distribution with lambda equal to the +cell means matrix. +} diff --git a/man/splotchSimCellMeans.Rd b/man/splotchSimCellMeans.Rd new file mode 100644 index 0000000000000000000000000000000000000000..0d5f87c5af74a7be89112a4fda94e4a1a11881e4 --- /dev/null +++ b/man/splotchSimCellMeans.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{splotchSimCellMeans} +\alias{splotchSimCellMeans} +\title{Simulate Splotch cell means} +\usage{ +splotchSimCellMeans(sim, params, verbose) +} +\arguments{ +\item{sim}{SingleCellExperiment containing simulation.} + +\item{params}{SplotParams object with simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SingleCellExperiment with cell means +} +\description{ +Simulate endogenous counts for each cell in a Splotch simulation +} +\details{ +Cells are first assigned to a path and a step along that path. This is +controlled by the \code{cells.design} parameter which is a \code{data.frame} +with the columns "Path", "Probability", "Alpha" and "Beta". The Path field +is an ID for each path and the Probabilty field is the probability that a +cell will come from that path (must sum to 1). The Alpha and Beta parameters +control the density of cells along the path. After they are assigned to paths +the step for each cell is sampled from a Beta distribution with paramaters +shape1 equals Alpha and shape2 equals beta. This approach is very flexible +and allows almost any distribution of cells along a path. The distribution +can be viewed using \code{hist(rbeta(10000, Alpha, Beta), breaks = 100)}. +Some useful combinations of parameters are: + +\describe{ + \item{\code{Alpha = 1}, \code{Beta = 1}}{Uniform distribution along the + path} + \item{\code{Alpha = 0}, \code{Beta = 1}}{All cells at the start of the + path.} + \item{\code{Alpha = 1}, \code{Beta = 0}}{All cells at the end of the + path.} + \item{\code{Alpha = 0}, \code{Beta = 0}}{Cells only at each end of the + path.} + \item{\code{Alpha = 1}, \code{Beta = 2}}{Linear skew towards the start + of the path} + \item{\code{Alpha = 0.5}, \code{Beta = 1}}{Curved skew towards the start + of the path} + \item{\code{Alpha = 2}, \code{Beta = 1}}{Linear skew towards the end + of the path} + \item{\code{Alpha = 1}, \code{Beta = 0.5}}{Curved skew towards the end + of the path} + \item{\code{Alpha = 0.5}, \code{Beta = 0.5}}{Curved skew towards both + ends of the path} + \item{\code{Alpha = 0.5}, \code{Beta = 0.5}}{Curved skew away from both + ends of the path} +} + +Once cells are assigned to paths and steps the correct means are extracted +from the \code{paths.means} parameter and adjusted based on each cell's +library size. An adjustment for BCV is then applied. Doublets are also +simulated at this stage by selecting two path/step combinations and averaging +the means. +} diff --git a/man/splotchSimCounts.Rd b/man/splotchSimCounts.Rd new file mode 100644 index 0000000000000000000000000000000000000000..562448fe8fb7059dd749bccfcd17b693c8daa62a --- /dev/null +++ b/man/splotchSimCounts.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{splotchSimCounts} +\alias{splotchSimCounts} +\title{Simulate Splotch final counts} +\usage{ +splotchSimCounts(sim, params, verbose) +} +\arguments{ +\item{sim}{SingleCellExperiment containing simulation.} + +\item{params}{SplotParams object with simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SingleCellExperiment with counts matrix +} +\description{ +Simulate the final counts matrix for a Splotch simulation +} +\details{ +The cell counts matrix and ambient counts matrix are added together. The +result is then downsampled to the cell library size (for cells and doublets) +or the ambient library size (for empty cells) using the +\code{\link[DropletUtils]{downsampleMatrix}} function. +} +\seealso{ +\code{\link[DropletUtils]{downsampleMatrix}} +} diff --git a/man/splotchSimGeneMeans.Rd b/man/splotchSimGeneMeans.Rd new file mode 100644 index 0000000000000000000000000000000000000000..0a24b63a04f2239d815638262105e1bdd6e42aa3 --- /dev/null +++ b/man/splotchSimGeneMeans.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{splotchSimGeneMeans} +\alias{splotchSimGeneMeans} +\title{Simulate Splotch gene means} +\usage{ +splotchSimGeneMeans(params, verbose) +} +\arguments{ +\item{params}{SplotchParams object containing simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SplotchParams object with gene means +} +\description{ +Simulate Splotch gene means +} +\details{ +Gene means are simulated in one of two ways depending on the value of the +\code{mean.method} parameter. + +If \code{mean.method} is "fit" (default) then means are sampled from a Gamma +distribution with shape equals \code{mean.shape} and rate equals +\code{mean.rate}. Expression outliers are then added by replacing some +values with the median multiplied by a factor from a log-normal distribution. +This is the same process used for the Splat simulation. + +If \code{mean.method} is "density" then means are sampled from the +density object in the \code{mean.density} parameter using a rejection +sampling method. This approach is more flexible but may violate some +statistical assumptions. +} diff --git a/man/splotchSimLibSizes.Rd b/man/splotchSimLibSizes.Rd new file mode 100644 index 0000000000000000000000000000000000000000..408e5096bd56d710e4d99e8d605b61de6a5943c7 --- /dev/null +++ b/man/splotchSimLibSizes.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{splotchSimLibSizes} +\alias{splotchSimLibSizes} +\title{Simulate Splotch library sizes} +\usage{ +splotchSimLibSizes(sim, params, verbose) +} +\arguments{ +\item{sim}{SingleCellExperiment containing simulation.} + +\item{params}{SplotParams object with simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SingleCellExperiment with library sizes +} +\description{ +Generate library sizes for cells in the Splotch simulatilon +} +\details{ +Library sizes are simulated in one of two ways depending on the value of the +\code{lib.method} parameter. + +If \code{lib.method} is "fit" (default) then means are sampled from a +log-normal distribution with meanlog equals \code{lib.loc} and sdlog equals +\code{lib.scale}. + +If \code{mean.method} is "density" then library sizes are sampled from the +density object in the \code{lib.density} parameter using a rejection +sampling method. This approach is more flexible but may violate some +statistical assumptions. + +Ambient library sizes are also generated from a log-normal distribution based +on the parameters for the cell library size and adjusted using the +\code{ambient.scale} parameter. +} diff --git a/man/splotchSimPaths.Rd b/man/splotchSimPaths.Rd new file mode 100644 index 0000000000000000000000000000000000000000..83d4d7dcc282d8af8495a4cc927e11764bdede18 --- /dev/null +++ b/man/splotchSimPaths.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-simulate.R +\name{splotchSimPaths} +\alias{splotchSimPaths} +\title{Simulate Splotch paths} +\usage{ +splotchSimPaths(params, verbose) +} +\arguments{ +\item{params}{SplotchParams object containing simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SplotchParams object with path means +} +\description{ +Simulate gene means for each step along each path of a Splotch simulation +} +\details{ +The method of simulating paths is inspired by the method used in the PROSSTT +simulation. Changes in expression are controlled by \code{paths.nPrograms} +regulatory programs. Each of the regulatory genes in the gene network has +some association with each program. This is analogous to there being changes +in the environment (the programs) which are sensed by receptors (regulatory +genes) and cause changes in expression downstream. For each path a random +walk is generated for each program and the changes passed on to the +regulatory genes. At each step the changes progagate through the network +according to the weights on edges between genes. This algorithm is fairly +simple but should result in correlation relationships between genes. However +it is likely to be improved and adjusted in the future. + +The path structure itself is specified by the \code{paths.design} parameter. +This is a \code{data.frame} with three columns: "Path", "From", and "Steps". +The Path field is an ID for each path while the Steps field controls the +length of each path. Increasing the number of steps will increase the +difference in expression between the ends of the paths. The From field sets +the originating point of each path. For example a From of \code{0, 0, 0} +would indicate three paths from the origin while a From of \code{0, 1, 1} +would give a branching structure with Path 1 beginning at the origin and +Path 2 and Path 3 beginning at the end of Path 1. +} +\references{ +Papadopoulos N, Parra RG, Söding J. PROSSTT: probabilistic simulation of +single-cell RNA-seq data for complex differentiation processes. +Bioinformatics (2019). \url{https://doi.org/10.1093/bioinformatics/btz078}. +} diff --git a/man/splotchSimulate.Rd b/man/splotchSimulate.Rd index 19458d9ce7120ea4ec2bc9e6f715775705025120..de50559db2c4b93c8a4d6686a0989fff20827bf5 100644 --- a/man/splotchSimulate.Rd +++ b/man/splotchSimulate.Rd @@ -15,15 +15,22 @@ splotchSimulate(params = newSplotchParams(), verbose = TRUE, ...) \code{params}.} } \value{ -SingleCellExperiment containing simulated counts +SingleCellExperiment containing simulated counts and intermediate +values } \description{ -Simulate counts from... +Simulate scRNA-seq count data using the Splotch model } \details{ -Details... +This functions is for simulating data in a single step. It consists of a +call to \code{\link{splotchSetup}} followed by a call to +\code{\link{splotchSample}}. Please see the documentation for those functions +for more details of the individual steps. } \examples{ sim <- splotchSimulate() } +\seealso{ +\code{\link{splotchSetup}}, \code{\link{splotchSample}} +}