Skip to content
Snippets Groups Projects
Commit 0b633ea0 authored by Luke Zappia's avatar Luke Zappia
Browse files

Merge branch 'mfa'

* mfa:
  Add mfa tests
  Add mfa to lists
  Add mfaSimulate
  Add mfaEstimate
  Add MFAParams
parents b90aea28 114a5aad
No related branches found
No related tags found
No related merge requests found
......@@ -51,7 +51,8 @@ Suggests:
rmarkdown,
S4Vectors,
scDD,
scran
scran,
mfa
biocViews: SingleCell, RNASeq, Transcriptomics, GeneExpression, Sequencing,
Software
URL: https://github.com/Oshlack/splatter
......
......@@ -4,6 +4,8 @@ S3method(lun2Estimate,SingleCellExperiment)
S3method(lun2Estimate,matrix)
S3method(lunEstimate,SingleCellExperiment)
S3method(lunEstimate,matrix)
S3method(mfaEstimate,SingleCellExperiment)
S3method(mfaEstimate,matrix)
S3method(scDDEstimate,SingleCellExperiment)
S3method(scDDEstimate,default)
S3method(scDDEstimate,matrix)
......@@ -24,8 +26,11 @@ export(lunSimulate)
export(makeCompPanel)
export(makeDiffPanel)
export(makeOverallPanel)
export(mfaEstimate)
export(mfaSimulate)
export(newLun2Params)
export(newLunParams)
export(newMFAParams)
export(newSCDDParams)
export(newSimpleParams)
export(newSplatParams)
......@@ -43,6 +48,7 @@ export(splatSimulateSingle)
export(summariseDiff)
exportClasses(Lun2Params)
exportClasses(LunParams)
exportClasses(MFAParams)
exportClasses(SCDDParams)
exportClasses(SimpleParams)
exportClasses(SplatParams)
......
......@@ -7,12 +7,12 @@
#' The Params class defines the following parameters:
#'
#' \describe{
#' \item{\code{[nGenes]}}{The number of genes to simulate.}
#' \item{\code{[nCells]}}{The number of cells to simulate.}
#' \item{\code{seed}}{Seed to use for generating random numbers.}
#' \item{\code{nGenes}}{The number of genes to simulate.}
#' \item{\code{nCells}}{The number of cells to simulate.}
#' \item{\code{[seed]}}{Seed to use for generating random numbers.}
#' }
#'
#' The parameters shown in brackets can be estimated from real data.
#' The parameters not shown in brackets can be estimated from real data.
#'
#' @name Params
#' @rdname Params
......@@ -460,3 +460,43 @@ setClass("SCDDParams",
modeFC = c(2, 3, 4),
varInflation = c(1, 1),
condition = "condition"))
#' The MFAParams class
#'
#' S4 class that holds parameters for the mfa simulation.
#'
#' @section Parameters:
#'
#' The mfa simulation uses the following parameters:
#'
#' \describe{
#' \item{\code{nGenes}}{The number of genes to simulate.}
#' \item{\code{nCells}}{The number of cells to simulate.}
#' \item{\code{[seed]}}{Seed to use for generating random numbers.}
#' \item{\code{[trans.prop]}}{Proportion of genes that show transient
#' expression. These genes are briefly up or down-regulated before returning
#' to their initial state}
#' \item{\code{[zero.neg]}}{Logical. Whether to set negative expression
#' values to zero. This will zero-inflate the data.}
#' \item{\code{[dropout.present]}}{Logical. Whether to simulate dropout.}
#' \item{\code{dropout.lambda}}{Lambda parameter for the exponential
#' dropout function.}
#' }
#'
#' The parameters not shown in brackets can be estimated from real data using
#' \code{\link{mfaEstimate}}. See \code{\link[mfa]{create_synthetic}} for more
#' details about the parameters. For details of the Splatter implementation of
#' the mfa simulation see \code{\link{mfaSimulate}}.
#'
#' @name MFAParams
#' @rdname MFAParams
#' @aliases MFAParams-class
#' @exportClass MFAParams
setClass("MFAParams",
contains = "Params",
slots = c(trans.prop = "numeric",
zero.neg = "logical",
dropout.present = "logical",
dropout.lambda = "numeric"),
prototype = prototype(trans.prop = 0, zero.neg = TRUE,
dropout.present = FALSE, dropout.lambda = 1))
#' @rdname newParams
#' @importFrom methods new
#' @export
newMFAParams <- function(...) {
if (!requireNamespace("mfa", quietly = TRUE)) {
stop("The mfa simulation requires the 'mfa' package.")
}
params <- new("MFAParams")
params <- setParams(params, ...)
return(params)
}
setValidity("MFAParams", function(object) {
v <- getParams(object, slotNames(object))
checks <- c(nGenes = checkmate::checkInt(v$nGenes, lower = 1),
nCells = checkmate::checkInt(v$nCells, lower = 1),
trans.prop = checkmate::checkNumber(v$trans.prop, lower = 0,
upper = 1),
zero.neg = checkmate::checkLogical(v$zero.neg,
any.missing = FALSE,
len = 1),
dropout.present = checkmate::checkLogical(v$dropout.present,
any.missing = FALSE,
len = 1),
dropout.lambda = checkmate::checkNumber(v$dropout.lambda),
seed = checkmate::checkInt(v$seed, lower = 0))
if (all(checks == TRUE)) {
valid <- TRUE
} else {
valid <- checks[checks != TRUE]
valid <- paste(names(valid), valid, sep = ": ")
}
return(valid)
})
setMethod("show", "MFAParams", function(object) {
pp <- list("Transient:" = c("[Proportion]" = "trans.prop"),
"Negative:" = c("[Zero]" = "zero.neg"),
"Dropout:" = c("[Present]" = "dropout.present",
"(Lambda)" = "dropout.lambda"))
callNextMethod()
showPP(object, pp)
})
......@@ -15,7 +15,8 @@ listSims <- function(print = TRUE) {
sims <- list(c("Splat", "splat", "", "",
"The Splat simulation generates means from a gamma
distribution, adjusts them for BCV and generates counts from
a gamma-poisson. Dropout can be optionally added."),
a gamma-poisson. Dropout and batch effects can be optionally
added."),
c("Splat Single", "splatSingle", "", "",
"The Splat simulation with a single population."),
c("Splat Groups", "splatGroups", "", "",
......@@ -46,12 +47,17 @@ listSims <- function(print = TRUE) {
"kdkorthauer/scDD",
"The scDD simulation samples a given dataset and can
simulate differentially expressed and differentially
distributed genes between two conditions."))
distributed genes between two conditions."),
c("mfa", "mfa", "10.12688/wellcomeopenres.11087.1",
"kieranrcampbell/mfa",
"The mfa simulation produces a bifurcating pseudotime
trajectory. This can optionally include genes with transient
changes in expression and added dropout."))
sims.table <- data.frame(Name = rep(NA, length(sims)),
Prefix = rep(NA, length(sims)),
DOI = rep(NA, length(sims)),
Github = rep(NA, length(sims)),
GitHub = rep(NA, length(sims)),
Description = rep(NA, length(sims)))
for (idx in seq_along(sims)) {
......@@ -65,7 +71,7 @@ listSims <- function(print = TRUE) {
for (idx in seq_len(nrow(sims.table))) {
sim <- as.character(sims.table[idx, ])
cat(sim[1], paste0("(", sim[2], ")"), "\n")
cat("DOI:", sim[3], "\t", "Github:", sim[4], "\n")
cat("DOI:", sim[3], "\t", "GitHub:", sim[4], "\n")
cat(sim[5], "\n\n")
}
}
......
#' Estimate mfa simulation parameters
#'
#' Estimate simulation parameters for the mfa simulation from a real dataset.
#'
#' @param counts either a counts matrix or a SingleCellExperiment object
#' containing count data to estimate parameters from.
#' @param params MFAParams object to store estimated values in.
#'
#' @details
#' The \code{nGenes} and \code{nCells} parameters are taken from the size of the
#' input data. The dropout lambda parameter is estimate using
#' \code{\link[mfa]{empirical_lambda}}. See \code{\link{MFAParams}} for more
#' details on the parameters.
#'
#' @return MFAParams object containing the estimated parameters.
#'
#' @examples
#' data("sc_example_counts")
#' params <- mfaEstimate(sc_example_counts)
#' params
#' @export
mfaEstimate <- function(counts, params = newMFAParams()) {
UseMethod("mfaEstimate")
}
#' @rdname mfaEstimate
#' @export
mfaEstimate.SingleCellExperiment <- function(counts,
params = newMFAParams()) {
counts <- BiocGenerics::counts(counts)
mfaEstimate(counts, params)
}
#' @rdname mfaEstimate
#' @export
mfaEstimate.matrix <- function(counts, params = newMFAParams()) {
checkmate::assertClass(params, "MFAParams")
dropout.lambda <- mfa::empirical_lambda(t(counts))
params <- setParams(params, nGenes = nrow(counts), nCells = ncol(counts),
dropout.lambda = dropout.lambda)
return(params)
}
#' MFA simulation
#'
#' Simulate a bifurcating pseudotime path using the mfa method.
#'
#' @param params MFAParams object containing simulation parameters.
#' @param verbose Logical. Whether to print progress messages.
#' @param ... any additional parameter settings to override what is provided in
#' \code{params}.
#'
#' @details
#' This function is just a wrapper around \code{\link[mfa]{create_synthetic}}
#' that takes a \code{\link{MFAParams}}, runs the simulation then converts the
#' output to a \code{\link[SingleCellExperiment]{SingleCellExperiment}} object.
#' See \code{\link[mfa]{create_synthetic}} and the mfa paper for more details
#' about how the simulation works.
#'
#' @return SingleCellExperiment containing simulated counts
#'
#' @references
#' Campbell KR, Yau C. Probabilistic modeling of bifurcations in single-cell
#' gene expression data using a Bayesian mixture of factor analyzers. Wellcome
#' Open Research (2017).
#'
#' Paper: \url{10.12688/wellcomeopenres.11087.1}
#'
#' Code: \url{https://github.com/kieranrcampbell/mfa}
#'
#' @examples
#' sim <- mfaSimulate()
#' @export
mfaSimulate <- function(params = newMFAParams(), verbose = TRUE, ...) {
checkmate::assertClass(params, "MFAParams")
params <- setParams(params, ...)
# Set random seed
seed <- getParam(params, "seed")
set.seed(seed)
# Get the parameters we are going to use
nCells <- getParam(params, "nCells")
nGenes <- getParam(params, "nGenes")
trans.prop <- getParam(params, "trans.prop")
zero.neg <- getParam(params, "zero.neg")
dropout.present <- getParam(params, "dropout.present")
dropout.lambda <- getParam(params, "dropout.lambda")
if (verbose) {message("Simulating counts...")}
mfa.sim <- mfa::create_synthetic(C = nCells,
G = nGenes,
p_transient = trans.prop,
zero_negative = zero.neg,
model_dropout = dropout.present,
lambda = dropout.lambda)
if (verbose) {message("Creating final dataset...")}
cell.names <- paste0("Cell", seq_len(nCells))
gene.names <- paste0("Gene", seq_len(nGenes))
counts <- t(mfa.sim$X)
rownames(counts) <- gene.names
colnames(counts) <- cell.names
cells <- data.frame(Cell = cell.names,
Branch = mfa.sim$branch,
Pseudotime = mfa.sim$pst)
rownames(cells) <- cell.names
features <- data.frame(Gene = gene.names,
KBranch1 = mfa.sim$k[, 1],
KBranch2 = mfa.sim$k[, 2],
PhiBranch1 = mfa.sim$phi[, 1],
PhiBranch2 = mfa.sim$phi[, 2],
DeltaBranch1 = mfa.sim$delta[, 1],
DeltaBranch2 = mfa.sim$delta[, 2])
rownames(features) <- gene.names
sim <- SingleCellExperiment(assays = list(counts = counts),
rowData = features,
colData = cells,
metadata = list(params = params))
return(sim)
}
......@@ -12,6 +12,7 @@ reference:
- '`getParams`'
- '`Lun2Params`'
- '`LunParams`'
- '`MFAParams`'
- '`newParams`'
- '`Params`'
- '`SCDDParams`'
......@@ -24,6 +25,7 @@ reference:
contents:
- '`lun2Estimate`'
- '`lunEstimate`'
- '`mfaEstimate`'
- '`scDDEstimate`'
- '`simpleEstimate`'
- '`splatEstBCV`'
......@@ -37,6 +39,7 @@ reference:
contents:
- '`lun2Simulate`'
- '`lunSimulate`'
- '`mfaSimulate`'
- '`scDDSimulate`'
- '`simpleSimulate`'
- '`splatSimBatchCellMeans`'
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllClasses.R
\docType{class}
\name{MFAParams}
\alias{MFAParams}
\alias{MFAParams-class}
\title{The MFAParams class}
\description{
S4 class that holds parameters for the mfa simulation.
}
\section{Parameters}{
The mfa simulation uses the following parameters:
\describe{
\item{\code{nGenes}}{The number of genes to simulate.}
\item{\code{nCells}}{The number of cells to simulate.}
\item{\code{[seed]}}{Seed to use for generating random numbers.}
\item{\code{[trans.prop]}}{Proportion of genes that show transient
expression. These genes are briefly up or down-regulated before returning
to their initial state}
\item{\code{[zero.neg]}}{Logical. Whether to set negative expression
values to zero. This will zero-inflate the data.}
\item{\code{[dropout.present]}}{Logical. Whether to simulate dropout.}
\item{\code{dropout.lambda}}{Lambda parameter for the exponential
dropout function.}
}
The parameters not shown in brackets can be estimated from real data using
\code{\link{mfaEstimate}}. See \code{\link[mfa]{create_synthetic}} for more
details about the parameters. For details of the Splatter implementation of
the mfa simulation see \code{\link{mfaSimulate}}.
}
......@@ -14,11 +14,11 @@ Virtual S4 class that all other Params classes inherit from.
The Params class defines the following parameters:
\describe{
\item{\code{[nGenes]}}{The number of genes to simulate.}
\item{\code{[nCells]}}{The number of cells to simulate.}
\item{\code{seed}}{Seed to use for generating random numbers.}
\item{\code{nGenes}}{The number of genes to simulate.}
\item{\code{nCells}}{The number of cells to simulate.}
\item{\code{[seed]}}{Seed to use for generating random numbers.}
}
The parameters shown in brackets can be estimated from real data.
The parameters not shown in brackets can be estimated from real data.
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mfa-estimate.R
\name{mfaEstimate}
\alias{mfaEstimate}
\alias{mfaEstimate.SingleCellExperiment}
\alias{mfaEstimate.matrix}
\title{Estimate mfa simulation parameters}
\usage{
mfaEstimate(counts, params = newMFAParams())
\method{mfaEstimate}{SingleCellExperiment}(counts, params = newMFAParams())
\method{mfaEstimate}{matrix}(counts, params = newMFAParams())
}
\arguments{
\item{counts}{either a counts matrix or a SingleCellExperiment object
containing count data to estimate parameters from.}
\item{params}{MFAParams object to store estimated values in.}
}
\value{
MFAParams object containing the estimated parameters.
}
\description{
Estimate simulation parameters for the mfa simulation from a real dataset.
}
\details{
The \code{nGenes} and \code{nCells} parameters are taken from the size of the
input data. The dropout lambda parameter is estimate using
\code{\link[mfa]{empirical_lambda}}. See \code{\link{MFAParams}} for more
details on the parameters.
}
\examples{
data("sc_example_counts")
params <- mfaEstimate(sc_example_counts)
params
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mfa-simulate.R
\name{mfaSimulate}
\alias{mfaSimulate}
\title{MFA simulation}
\usage{
mfaSimulate(params = newMFAParams(), verbose = TRUE, ...)
}
\arguments{
\item{params}{MFAParams object containing simulation parameters.}
\item{verbose}{Logical. Whether to print progress messages.}
\item{...}{any additional parameter settings to override what is provided in
\code{params}.}
}
\value{
SingleCellExperiment containing simulated counts
}
\description{
Simulate a bifurcating pseudotime path using the mfa method.
}
\details{
This function is just a wrapper around \code{\link[mfa]{create_synthetic}}
that takes a \code{\link{MFAParams}}, runs the simulation then converts the
output to a \code{\link[SingleCellExperiment]{SingleCellExperiment}} object.
See \code{\link[mfa]{create_synthetic}} and the mfa paper for more details
about how the simulation works.
}
\examples{
sim <- mfaSimulate()
}
\references{
Campbell KR, Yau C. Probabilistic modeling of bifurcations in single-cell
gene expression data using a Bayesian mixture of factor analyzers. Wellcome
Open Research (2017).
Paper: \url{10.12688/wellcomeopenres.11087.1}
Code: \url{https://github.com/kieranrcampbell/mfa}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/Lun2Params-methods.R,
% R/LunParams-methods.R, R/SCDDParams-methods.R, R/SimpleParams-methods.R,
% R/SplatParams-methods.R
% R/LunParams-methods.R, R/MFAParams-methods.R, R/SCDDParams-methods.R,
% R/SimpleParams-methods.R, R/SplatParams-methods.R
\name{newParams}
\alias{newParams}
\alias{newLun2Params}
\alias{newLunParams}
\alias{newMFAParams}
\alias{newSCDDParams}
\alias{newSimpleParams}
\alias{newSplatParams}
......@@ -15,6 +16,8 @@ newLun2Params(...)
newLunParams(...)
newMFAParams(...)
newSCDDParams(...)
newSimpleParams(...)
......
context("mfa simulation")
test_that("mfa simulation output is valid", {
expect_true(validObject(mfaSimulate()))
expect_true(validObject(mfaSimulate(dropout.present = TRUE)))
})
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment