-
Christina Azodi authoredChristina Azodi authored
splatSimulate.Rd 5.86 KiB
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/splat-simulate.R
\name{splatSimulate}
\alias{splatSimulate}
\alias{splatSimulateSingle}
\alias{splatSimulateGroups}
\alias{splatSimulatePaths}
\title{Splat simulation}
\usage{
splatSimulate(
params = newSplatParams(),
method = c("single", "groups", "paths", "eqtl"),
eqtl = NULL,
verbose = TRUE,
...
)
splatSimulateSingle(params = newSplatParams(), verbose = TRUE, ...)
splatSimulateGroups(params = newSplatParams(), verbose = TRUE, ...)
splatSimulatePaths(params = newSplatParams(), verbose = TRUE, ...)
}
\arguments{
\item{params}{SplatParams object containing parameters for the simulation.
See \code{\link{SplatParams}} for details.}
\item{method}{which simulation method to use. Options are "single" which
produces a single population, "groups" which produces distinct groups
(eg. cell types), "paths" which selects cells from continuous
trajectories (eg. differentiation processes), or "eqtl" which produces
a single population for every sample (ie column) in the dataframe
output by splateQTL().}
\item{eqtl}{matrix of mean gene counts across the eQTL population, output
from splateQTL().}
\item{verbose}{logical. Whether to print progress messages.}
\item{...}{any additional parameter settings to override what is provided in
\code{params}.}
}
\value{
SingleCellExperiment object containing the simulated counts and
intermediate values.
}
\description{
Simulate count data from a fictional single-cell RNA-seq experiment using
the Splat method.
}
\details{
Parameters can be set in a variety of ways. If no parameters are provided
the default parameters are used. Any parameters in \code{params} can be
overridden by supplying additional arguments through a call to
\code{\link{setParams}}. This design allows the user flexibility in
how they supply parameters and allows small adjustments without creating a
new \code{SplatParams} object. See examples for a demonstration of how this
can be used.
The simulation involves the following steps:
\enumerate{
\item Set up simulation object
\item Simulate library sizes
\item Simulate gene means
\item Simulate groups/paths
\item Simulate BCV adjusted cell means
\item Simulate true counts
\item Simulate dropout
\item Create final dataset
}
The final output is a
\code{\link[SingleCellExperiment]{SingleCellExperiment}} object that
contains the simulated counts but also the values for various intermediate
steps. These are stored in the \code{\link{colData}} (for cell specific
information), \code{\link{rowData}} (for gene specific information) or
\code{\link{assays}} (for gene by cell matrices) slots. This additional
information includes:
\describe{
\item{\code{colData}}{
\describe{
\item{Cell}{Unique cell identifier.}
\item{Group}{The group or path the cell belongs to.}
\item{ExpLibSize}{The expected library size for that cell.}
\item{Step (paths only)}{how far along the path each cell is.}
}
}
\item{\code{rowData}}{
\describe{
\item{Gene}{Unique gene identifier.}
\item{BaseGeneMean}{The base expression level for that gene.}
\item{OutlierFactor}{Expression outlier factor for that gene.
Values of 1 indicate the gene is not an expression outlier.}
\item{GeneMean}{Expression level after applying outlier factors.}
\item{BatchFac[Batch]}{The batch effects factor for each gene for
a particular batch.}
\item{DEFac[Group]}{The differential expression factor for each
gene in a particular group. Values of 1 indicate the gene is not
differentially expressed.}
\item{SigmaFac[Path]}{Factor applied to genes that have
non-linear changes in expression along a path.}
}
}
\item{\code{assays}}{
\describe{
\item{BatchCellMeans}{The mean expression of genes in each cell
after adding batch effects.}
\item{BaseCellMeans}{The mean expression of genes in each cell
after any differential expression and adjusted for expected
library size.}
\item{BCV}{The Biological Coefficient of Variation for each gene
in each cell.}
\item{CellMeans}{The mean expression level of genes in each cell
adjusted for BCV.}
\item{TrueCounts}{The simulated counts before dropout.}
\item{Dropout}{Logical matrix showing which values have been
dropped in which cells.}
}
}
}
Values that have been added by Splatter are named using \code{UpperCamelCase}
in order to differentiate them from the values added by analysis packages
which typically use \code{underscore_naming}.
}
\examples{
# Simulation with default parameters
sim <- splatSimulate()
\dontrun{
# Simulation with different number of genes
sim <- splatSimulate(nGenes = 1000)
# Simulation with custom parameters
params <- newSplatParams(nGenes = 100, mean.rate = 0.5)
sim <- splatSimulate(params)
# Simulation with adjusted custom parameters
sim <- splatSimulate(params, mean.rate = 0.6, out.prob = 0.2)
# Simulate groups
sim <- splatSimulate(method = "groups")
# Simulate paths
sim <- splatSimulate(method = "paths")
# Simulate eQTL data
eqtl <- splateQTL(params=params)
sim <- splatSimulate(params=params, method = 'eqtl', eqtl = eqtl)
}
}
\references{
Zappia L, Phipson B, Oshlack A. Splatter: simulation of single-cell RNA
sequencing data. Genome Biology (2017).
Paper: \url{10.1186/s13059-017-1305-0}
Code: \url{https://github.com/Oshlack/splatter}
}
\seealso{
\code{\link{splatSimLibSizes}}, \code{\link{splatSimGeneMeans}},
\code{\link{splatSimBatchEffects}}, \code{\link{splatSimBatchCellMeans}},
\code{\link{splatSimDE}}, \code{\link{splatSimCellMeans}},
\code{\link{splatSimBCVMeans}}, \code{\link{splatSimTrueCounts}},
\code{\link{splatSimDropout}}
}