diff --git a/DESCRIPTION b/DESCRIPTION index 73a36d7df45b733557f8167cd9e1db8ff9de47d3..68a958e1298e8906a3c0f8da5ee1c9ef9eaa5e20 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,7 +51,8 @@ Suggests: rmarkdown, S4Vectors, scDD, - scran + scran, + mfa biocViews: SingleCell, RNASeq, Transcriptomics, GeneExpression, Sequencing, Software URL: https://github.com/Oshlack/splatter diff --git a/NAMESPACE b/NAMESPACE index 16a874dc59cc293733ab91688904545878cf5ff6..5017918a15815477a8c8c0152abb2c45d72d8349 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,8 @@ S3method(lun2Estimate,SingleCellExperiment) S3method(lun2Estimate,matrix) S3method(lunEstimate,SingleCellExperiment) S3method(lunEstimate,matrix) +S3method(mfaEstimate,SingleCellExperiment) +S3method(mfaEstimate,matrix) S3method(scDDEstimate,SingleCellExperiment) S3method(scDDEstimate,default) S3method(scDDEstimate,matrix) @@ -24,8 +26,11 @@ export(lunSimulate) export(makeCompPanel) export(makeDiffPanel) export(makeOverallPanel) +export(mfaEstimate) +export(mfaSimulate) export(newLun2Params) export(newLunParams) +export(newMFAParams) export(newSCDDParams) export(newSimpleParams) export(newSplatParams) @@ -43,6 +48,7 @@ export(splatSimulateSingle) export(summariseDiff) exportClasses(Lun2Params) exportClasses(LunParams) +exportClasses(MFAParams) exportClasses(SCDDParams) exportClasses(SimpleParams) exportClasses(SplatParams) diff --git a/R/AllClasses.R b/R/AllClasses.R index 36fe0b311b765b47b6d617e2ee328af9c8eb2ad4..536efced17047cef1f4b74ba0cca485c5cf6f904 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -7,12 +7,12 @@ #' The Params class defines the following parameters: #' #' \describe{ -#' \item{\code{[nGenes]}}{The number of genes to simulate.} -#' \item{\code{[nCells]}}{The number of cells to simulate.} -#' \item{\code{seed}}{Seed to use for generating random numbers.} +#' \item{\code{nGenes}}{The number of genes to simulate.} +#' \item{\code{nCells}}{The number of cells to simulate.} +#' \item{\code{[seed]}}{Seed to use for generating random numbers.} #' } #' -#' The parameters shown in brackets can be estimated from real data. +#' The parameters not shown in brackets can be estimated from real data. #' #' @name Params #' @rdname Params @@ -460,3 +460,43 @@ setClass("SCDDParams", modeFC = c(2, 3, 4), varInflation = c(1, 1), condition = "condition")) + +#' The MFAParams class +#' +#' S4 class that holds parameters for the mfa simulation. +#' +#' @section Parameters: +#' +#' The mfa simulation uses the following parameters: +#' +#' \describe{ +#' \item{\code{nGenes}}{The number of genes to simulate.} +#' \item{\code{nCells}}{The number of cells to simulate.} +#' \item{\code{[seed]}}{Seed to use for generating random numbers.} +#' \item{\code{[trans.prop]}}{Proportion of genes that show transient +#' expression. These genes are briefly up or down-regulated before returning +#' to their initial state} +#' \item{\code{[zero.neg]}}{Logical. Whether to set negative expression +#' values to zero. This will zero-inflate the data.} +#' \item{\code{[dropout.present]}}{Logical. Whether to simulate dropout.} +#' \item{\code{dropout.lambda}}{Lambda parameter for the exponential +#' dropout function.} +#' } +#' +#' The parameters not shown in brackets can be estimated from real data using +#' \code{\link{mfaEstimate}}. See \code{\link[mfa]{create_synthetic}} for more +#' details about the parameters. For details of the Splatter implementation of +#' the mfa simulation see \code{\link{mfaSimulate}}. +#' +#' @name MFAParams +#' @rdname MFAParams +#' @aliases MFAParams-class +#' @exportClass MFAParams +setClass("MFAParams", + contains = "Params", + slots = c(trans.prop = "numeric", + zero.neg = "logical", + dropout.present = "logical", + dropout.lambda = "numeric"), + prototype = prototype(trans.prop = 0, zero.neg = TRUE, + dropout.present = FALSE, dropout.lambda = 1)) diff --git a/R/MFAParams-methods.R b/R/MFAParams-methods.R new file mode 100644 index 0000000000000000000000000000000000000000..2ff3e54d5f8730b93e7c8cf6789923c2fc148f12 --- /dev/null +++ b/R/MFAParams-methods.R @@ -0,0 +1,52 @@ +#' @rdname newParams +#' @importFrom methods new +#' @export +newMFAParams <- function(...) { + + if (!requireNamespace("mfa", quietly = TRUE)) { + stop("The mfa simulation requires the 'mfa' package.") + } + + params <- new("MFAParams") + params <- setParams(params, ...) + + return(params) +} + +setValidity("MFAParams", function(object) { + + v <- getParams(object, slotNames(object)) + + checks <- c(nGenes = checkmate::checkInt(v$nGenes, lower = 1), + nCells = checkmate::checkInt(v$nCells, lower = 1), + trans.prop = checkmate::checkNumber(v$trans.prop, lower = 0, + upper = 1), + zero.neg = checkmate::checkLogical(v$zero.neg, + any.missing = FALSE, + len = 1), + dropout.present = checkmate::checkLogical(v$dropout.present, + any.missing = FALSE, + len = 1), + dropout.lambda = checkmate::checkNumber(v$dropout.lambda), + seed = checkmate::checkInt(v$seed, lower = 0)) + + if (all(checks == TRUE)) { + valid <- TRUE + } else { + valid <- checks[checks != TRUE] + valid <- paste(names(valid), valid, sep = ": ") + } + + return(valid) +}) + +setMethod("show", "MFAParams", function(object) { + + pp <- list("Transient:" = c("[Proportion]" = "trans.prop"), + "Negative:" = c("[Zero]" = "zero.neg"), + "Dropout:" = c("[Present]" = "dropout.present", + "(Lambda)" = "dropout.lambda")) + + callNextMethod() + showPP(object, pp) +}) diff --git a/R/listSims.R b/R/listSims.R index 359e8f0ceca22ca0dd01e3a4eece273432ab1846..51fb3ff91491b274ff28f5e82d0187bb523c63ba 100644 --- a/R/listSims.R +++ b/R/listSims.R @@ -15,7 +15,8 @@ listSims <- function(print = TRUE) { sims <- list(c("Splat", "splat", "", "", "The Splat simulation generates means from a gamma distribution, adjusts them for BCV and generates counts from - a gamma-poisson. Dropout can be optionally added."), + a gamma-poisson. Dropout and batch effects can be optionally + added."), c("Splat Single", "splatSingle", "", "", "The Splat simulation with a single population."), c("Splat Groups", "splatGroups", "", "", @@ -46,12 +47,17 @@ listSims <- function(print = TRUE) { "kdkorthauer/scDD", "The scDD simulation samples a given dataset and can simulate differentially expressed and differentially - distributed genes between two conditions.")) + distributed genes between two conditions."), + c("mfa", "mfa", "10.12688/wellcomeopenres.11087.1", + "kieranrcampbell/mfa", + "The mfa simulation produces a bifurcating pseudotime + trajectory. This can optionally include genes with transient + changes in expression and added dropout.")) sims.table <- data.frame(Name = rep(NA, length(sims)), Prefix = rep(NA, length(sims)), DOI = rep(NA, length(sims)), - Github = rep(NA, length(sims)), + GitHub = rep(NA, length(sims)), Description = rep(NA, length(sims))) for (idx in seq_along(sims)) { @@ -65,7 +71,7 @@ listSims <- function(print = TRUE) { for (idx in seq_len(nrow(sims.table))) { sim <- as.character(sims.table[idx, ]) cat(sim[1], paste0("(", sim[2], ")"), "\n") - cat("DOI:", sim[3], "\t", "Github:", sim[4], "\n") + cat("DOI:", sim[3], "\t", "GitHub:", sim[4], "\n") cat(sim[5], "\n\n") } } diff --git a/R/mfa-estimate.R b/R/mfa-estimate.R new file mode 100644 index 0000000000000000000000000000000000000000..f3abf2f12b4f853fa060a2fdad398be24b077d57 --- /dev/null +++ b/R/mfa-estimate.R @@ -0,0 +1,46 @@ +#' Estimate mfa simulation parameters +#' +#' Estimate simulation parameters for the mfa simulation from a real dataset. +#' +#' @param counts either a counts matrix or a SingleCellExperiment object +#' containing count data to estimate parameters from. +#' @param params MFAParams object to store estimated values in. +#' +#' @details +#' The \code{nGenes} and \code{nCells} parameters are taken from the size of the +#' input data. The dropout lambda parameter is estimate using +#' \code{\link[mfa]{empirical_lambda}}. See \code{\link{MFAParams}} for more +#' details on the parameters. +#' +#' @return MFAParams object containing the estimated parameters. +#' +#' @examples +#' data("sc_example_counts") +#' params <- mfaEstimate(sc_example_counts) +#' params +#' @export +mfaEstimate <- function(counts, params = newMFAParams()) { + UseMethod("mfaEstimate") +} + +#' @rdname mfaEstimate +#' @export +mfaEstimate.SingleCellExperiment <- function(counts, + params = newMFAParams()) { + counts <- BiocGenerics::counts(counts) + mfaEstimate(counts, params) +} + +#' @rdname mfaEstimate +#' @export +mfaEstimate.matrix <- function(counts, params = newMFAParams()) { + + checkmate::assertClass(params, "MFAParams") + + dropout.lambda <- mfa::empirical_lambda(t(counts)) + + params <- setParams(params, nGenes = nrow(counts), nCells = ncol(counts), + dropout.lambda = dropout.lambda) + + return(params) +} diff --git a/R/mfa-simulate.R b/R/mfa-simulate.R new file mode 100644 index 0000000000000000000000000000000000000000..70d171dc0ba7ede8c64703116b81e8bfbf5fbb72 --- /dev/null +++ b/R/mfa-simulate.R @@ -0,0 +1,84 @@ +#' MFA simulation +#' +#' Simulate a bifurcating pseudotime path using the mfa method. +#' +#' @param params MFAParams object containing simulation parameters. +#' @param verbose Logical. Whether to print progress messages. +#' @param ... any additional parameter settings to override what is provided in +#' \code{params}. +#' +#' @details +#' This function is just a wrapper around \code{\link[mfa]{create_synthetic}} +#' that takes a \code{\link{MFAParams}}, runs the simulation then converts the +#' output to a \code{\link[SingleCellExperiment]{SingleCellExperiment}} object. +#' See \code{\link[mfa]{create_synthetic}} and the mfa paper for more details +#' about how the simulation works. +#' +#' @return SingleCellExperiment containing simulated counts +#' +#' @references +#' Campbell KR, Yau C. Probabilistic modeling of bifurcations in single-cell +#' gene expression data using a Bayesian mixture of factor analyzers. Wellcome +#' Open Research (2017). +#' +#' Paper: \url{10.12688/wellcomeopenres.11087.1} +#' +#' Code: \url{https://github.com/kieranrcampbell/mfa} +#' +#' @examples +#' sim <- mfaSimulate() +#' @export +mfaSimulate <- function(params = newMFAParams(), verbose = TRUE, ...) { + + checkmate::assertClass(params, "MFAParams") + params <- setParams(params, ...) + + # Set random seed + seed <- getParam(params, "seed") + set.seed(seed) + + # Get the parameters we are going to use + nCells <- getParam(params, "nCells") + nGenes <- getParam(params, "nGenes") + trans.prop <- getParam(params, "trans.prop") + zero.neg <- getParam(params, "zero.neg") + dropout.present <- getParam(params, "dropout.present") + dropout.lambda <- getParam(params, "dropout.lambda") + + if (verbose) {message("Simulating counts...")} + mfa.sim <- mfa::create_synthetic(C = nCells, + G = nGenes, + p_transient = trans.prop, + zero_negative = zero.neg, + model_dropout = dropout.present, + lambda = dropout.lambda) + + if (verbose) {message("Creating final dataset...")} + cell.names <- paste0("Cell", seq_len(nCells)) + gene.names <- paste0("Gene", seq_len(nGenes)) + + counts <- t(mfa.sim$X) + rownames(counts) <- gene.names + colnames(counts) <- cell.names + + cells <- data.frame(Cell = cell.names, + Branch = mfa.sim$branch, + Pseudotime = mfa.sim$pst) + rownames(cells) <- cell.names + + features <- data.frame(Gene = gene.names, + KBranch1 = mfa.sim$k[, 1], + KBranch2 = mfa.sim$k[, 2], + PhiBranch1 = mfa.sim$phi[, 1], + PhiBranch2 = mfa.sim$phi[, 2], + DeltaBranch1 = mfa.sim$delta[, 1], + DeltaBranch2 = mfa.sim$delta[, 2]) + rownames(features) <- gene.names + + sim <- SingleCellExperiment(assays = list(counts = counts), + rowData = features, + colData = cells, + metadata = list(params = params)) + + return(sim) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 7da353f10dddc7efc64060ee37b857127c048215..cd4c1f15af8c7b76d2e5256036d63835e03d3ba6 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -12,6 +12,7 @@ reference: - '`getParams`' - '`Lun2Params`' - '`LunParams`' + - '`MFAParams`' - '`newParams`' - '`Params`' - '`SCDDParams`' @@ -24,6 +25,7 @@ reference: contents: - '`lun2Estimate`' - '`lunEstimate`' + - '`mfaEstimate`' - '`scDDEstimate`' - '`simpleEstimate`' - '`splatEstBCV`' @@ -37,6 +39,7 @@ reference: contents: - '`lun2Simulate`' - '`lunSimulate`' + - '`mfaSimulate`' - '`scDDSimulate`' - '`simpleSimulate`' - '`splatSimBatchCellMeans`' diff --git a/man/MFAParams.Rd b/man/MFAParams.Rd new file mode 100644 index 0000000000000000000000000000000000000000..98513986450613ef64bad9e52401da3085e1c54c --- /dev/null +++ b/man/MFAParams.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllClasses.R +\docType{class} +\name{MFAParams} +\alias{MFAParams} +\alias{MFAParams-class} +\title{The MFAParams class} +\description{ +S4 class that holds parameters for the mfa simulation. +} +\section{Parameters}{ + + +The mfa simulation uses the following parameters: + +\describe{ + \item{\code{nGenes}}{The number of genes to simulate.} + \item{\code{nCells}}{The number of cells to simulate.} + \item{\code{[seed]}}{Seed to use for generating random numbers.} + \item{\code{[trans.prop]}}{Proportion of genes that show transient + expression. These genes are briefly up or down-regulated before returning + to their initial state} + \item{\code{[zero.neg]}}{Logical. Whether to set negative expression + values to zero. This will zero-inflate the data.} + \item{\code{[dropout.present]}}{Logical. Whether to simulate dropout.} + \item{\code{dropout.lambda}}{Lambda parameter for the exponential + dropout function.} +} + +The parameters not shown in brackets can be estimated from real data using +\code{\link{mfaEstimate}}. See \code{\link[mfa]{create_synthetic}} for more +details about the parameters. For details of the Splatter implementation of +the mfa simulation see \code{\link{mfaSimulate}}. +} + diff --git a/man/Params.Rd b/man/Params.Rd index c8fbe4566a83eea6ec35f0ee19667cbd905fd183..156c6c4e204c14862bed13e076e70900f2543d36 100644 --- a/man/Params.Rd +++ b/man/Params.Rd @@ -14,11 +14,11 @@ Virtual S4 class that all other Params classes inherit from. The Params class defines the following parameters: \describe{ - \item{\code{[nGenes]}}{The number of genes to simulate.} - \item{\code{[nCells]}}{The number of cells to simulate.} - \item{\code{seed}}{Seed to use for generating random numbers.} + \item{\code{nGenes}}{The number of genes to simulate.} + \item{\code{nCells}}{The number of cells to simulate.} + \item{\code{[seed]}}{Seed to use for generating random numbers.} } -The parameters shown in brackets can be estimated from real data. +The parameters not shown in brackets can be estimated from real data. } diff --git a/man/mfaEstimate.Rd b/man/mfaEstimate.Rd new file mode 100644 index 0000000000000000000000000000000000000000..002cd047b58ec935bdd1f421756597d8cedb6549 --- /dev/null +++ b/man/mfaEstimate.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mfa-estimate.R +\name{mfaEstimate} +\alias{mfaEstimate} +\alias{mfaEstimate.SingleCellExperiment} +\alias{mfaEstimate.matrix} +\title{Estimate mfa simulation parameters} +\usage{ +mfaEstimate(counts, params = newMFAParams()) + +\method{mfaEstimate}{SingleCellExperiment}(counts, params = newMFAParams()) + +\method{mfaEstimate}{matrix}(counts, params = newMFAParams()) +} +\arguments{ +\item{counts}{either a counts matrix or a SingleCellExperiment object +containing count data to estimate parameters from.} + +\item{params}{MFAParams object to store estimated values in.} +} +\value{ +MFAParams object containing the estimated parameters. +} +\description{ +Estimate simulation parameters for the mfa simulation from a real dataset. +} +\details{ +The \code{nGenes} and \code{nCells} parameters are taken from the size of the +input data. The dropout lambda parameter is estimate using +\code{\link[mfa]{empirical_lambda}}. See \code{\link{MFAParams}} for more +details on the parameters. +} +\examples{ +data("sc_example_counts") +params <- mfaEstimate(sc_example_counts) +params +} diff --git a/man/mfaSimulate.Rd b/man/mfaSimulate.Rd new file mode 100644 index 0000000000000000000000000000000000000000..7f7e3566fe2737ddedf59c864bc3466efda453e1 --- /dev/null +++ b/man/mfaSimulate.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mfa-simulate.R +\name{mfaSimulate} +\alias{mfaSimulate} +\title{MFA simulation} +\usage{ +mfaSimulate(params = newMFAParams(), verbose = TRUE, ...) +} +\arguments{ +\item{params}{MFAParams object containing simulation parameters.} + +\item{verbose}{Logical. Whether to print progress messages.} + +\item{...}{any additional parameter settings to override what is provided in +\code{params}.} +} +\value{ +SingleCellExperiment containing simulated counts +} +\description{ +Simulate a bifurcating pseudotime path using the mfa method. +} +\details{ +This function is just a wrapper around \code{\link[mfa]{create_synthetic}} +that takes a \code{\link{MFAParams}}, runs the simulation then converts the +output to a \code{\link[SingleCellExperiment]{SingleCellExperiment}} object. +See \code{\link[mfa]{create_synthetic}} and the mfa paper for more details +about how the simulation works. +} +\examples{ +sim <- mfaSimulate() +} +\references{ +Campbell KR, Yau C. Probabilistic modeling of bifurcations in single-cell +gene expression data using a Bayesian mixture of factor analyzers. Wellcome +Open Research (2017). + +Paper: \url{10.12688/wellcomeopenres.11087.1} + +Code: \url{https://github.com/kieranrcampbell/mfa} +} diff --git a/man/newParams.Rd b/man/newParams.Rd index 197dc0204454f31bf20343896156205931392de4..de3e6b85de30198d6b9b53158119455e52e40d50 100644 --- a/man/newParams.Rd +++ b/man/newParams.Rd @@ -1,11 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/Lun2Params-methods.R, -% R/LunParams-methods.R, R/SCDDParams-methods.R, R/SimpleParams-methods.R, -% R/SplatParams-methods.R +% R/LunParams-methods.R, R/MFAParams-methods.R, R/SCDDParams-methods.R, +% R/SimpleParams-methods.R, R/SplatParams-methods.R \name{newParams} \alias{newParams} \alias{newLun2Params} \alias{newLunParams} +\alias{newMFAParams} \alias{newSCDDParams} \alias{newSimpleParams} \alias{newSplatParams} @@ -15,6 +16,8 @@ newLun2Params(...) newLunParams(...) +newMFAParams(...) + newSCDDParams(...) newSimpleParams(...) diff --git a/tests/testthat/test-mfa-simulate.R b/tests/testthat/test-mfa-simulate.R new file mode 100644 index 0000000000000000000000000000000000000000..877e0a9686460f9ec84fe107c0fb1397da969723 --- /dev/null +++ b/tests/testthat/test-mfa-simulate.R @@ -0,0 +1,6 @@ +context("mfa simulation") + +test_that("mfa simulation output is valid", { + expect_true(validObject(mfaSimulate())) + expect_true(validObject(mfaSimulate(dropout.present = TRUE))) +}) diff --git a/tests/testthat/test-simulate-simple.R b/tests/testthat/test-simple-simulate.R similarity index 100% rename from tests/testthat/test-simulate-simple.R rename to tests/testthat/test-simple-simulate.R