diff --git a/DESCRIPTION b/DESCRIPTION index 8d6ed5f3936c6bef47b00f601fae2b6db5231f65..a57545e5fce95b0cf04eae188341fa5d6785ad7a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,7 +54,8 @@ Suggests: scran, mfa, phenopath, - zinbwave + zinbwave, + BASiCS biocViews: SingleCell, RNASeq, Transcriptomics, GeneExpression, Sequencing, Software URL: https://github.com/Oshlack/splatter diff --git a/NAMESPACE b/NAMESPACE index 4cfad39e5ef56f527991f6f4713875addfdcd440..642439f7ddb59588e139aa67551ae8d178874290 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method(BASiCSEstimate,SCESet) +S3method(BASiCSEstimate,matrix) S3method(lun2Estimate,SingleCellExperiment) S3method(lun2Estimate,matrix) S3method(lunEstimate,SingleCellExperiment) @@ -17,6 +19,8 @@ S3method(splatEstimate,SingleCellExperiment) S3method(splatEstimate,matrix) S3method(zinbEstimate,SingleCellExperiment) S3method(zinbEstimate,matrix) +export(BASiCSEstimate) +export(BASiCSSimulate) export(addGeneLengths) export(compareSCEs) export(diffSCEs) @@ -32,6 +36,7 @@ export(makeDiffPanel) export(makeOverallPanel) export(mfaEstimate) export(mfaSimulate) +export(newBASiCSParams) export(newLun2Params) export(newLunParams) export(newMFAParams) @@ -56,6 +61,7 @@ export(splatSimulateSingle) export(summariseDiff) export(zinbEstimate) export(zinbSimulate) +exportClasses(BASiCSParams) exportClasses(Lun2Params) exportClasses(LunParams) exportClasses(MFAParams) @@ -109,6 +115,7 @@ importFrom(methods,new) importFrom(methods,slot) importFrom(methods,slotNames) importFrom(methods,validObject) +importFrom(scater,newSCESet) importFrom(stats,dnbinom) importFrom(stats,median) importFrom(stats,model.matrix) diff --git a/R/AllClasses.R b/R/AllClasses.R index ac6cdc094f8027b1d2a36c0a87713a81cd383ac9..b0a69d8d365bda833e71260d025458e7c8730557 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -308,7 +308,7 @@ setClass("LunParams", #' The Lun2Params class #' -#' S4 class that holds parameters for the Lun simulation. +#' S4 class that holds parameters for the Lun2 simulation. #' #' @section Parameters: #' @@ -461,6 +461,90 @@ setClass("SCDDParams", varInflation = c(1, 1), condition = "condition")) +#' The BASiCSParams class +#' +#' S4 class that holds parameters for the BASiCS simulation. +#' +#' @section Parameters: +#' +#' The BASiCS simulation uses the following parameters: +#' \describe{ +#' \item{\code{nGenes}}{The number of genes to simulate.} +#' \item{\code{nCells}}{The number of cells to simulate.} +#' \item{\code{[seed]}}{Seed to use for generating random numbers.} +#' \item{\emph{Batch parameters}}{ +#' \describe{ +#' \item{\code{nBatches}}{Number of batches to simulate.} +#' \item{\code{batchCells}}{Number of cells in each batch.} +#' } +#' } +#' \item{\emph{Gene parameters}}{ +#' \describe{ +#' \item{\code{gene.params}}{A \code{data.frame} containing gene +#' parameters with two coloumns: \code{Mean} (mean expression for +#' each biological gene) and \code{Delta} (cell-to-cell +#' heterogeneity for each biological gene).} +#' } +#' } +#' \item{\emph{Spike-in parameters}}{ +#' \describe{ +#' \item{\code{nSpikes}}{The number of spike-ins to simulate.} +#' \item{\code{spike.means}}{Input molecules for each spike-in.} +#' } +#' } +#' \item{\emph{Cell parameters}}{ +#' \describe{ +#' \item{\code{cell.params}}{A \code{data.frame} containing gene +#' parameters with two coloumns: \code{Phi} (mRNA content factor for +#' each cell, scaled to sum to the number of cells in each batch) +#' and \code{S} (capture efficient for each cell).} +#' } +#' } +#' \item{\emph{Variability parameters}}{ +#' \describe{ +#' \item{\code{theta}}{Technical variability parameter for each +#' batch.} +#' } +#' } +#' } +#' +#' The parameters not shown in brackets can be estimated from real data using +#' \code{\link{BASiCSEstimate}}. For details of the BASiCS simulation see +#' \code{\link{BASiCSSimulate}}. +#' +#' @name BASiCSParams +#' @rdname BASiCSParams +#' @aliases BASiCSParams-class +#' @exportClass BASiCSParams +setClass("BASiCSParams", + contains = "Params", + slots = c(nBatches = "numeric", + batchCells = "numeric", + gene.params = "data.frame", + nSpikes = "numeric", + spike.means = "numeric", + cell.params = "data.frame", + theta = "numeric"), + prototype = prototype(nBatches = 1, + batchCells = 100, + gene.params = + data.frame( + Mean = c(8.36, 10.65, 4.88, 6.29, 21.72, + 12.93, 30.19), + Delta = c(1.29, 0.88, 1.51, 1.49, 0.54, + 0.40, 0.85) + ), + nSpikes = 5, + spike.means = c(12.93, 30.19, 1010.72, 7.90, + 31.59), + cell.params = + data.frame( + Phi = c(1.00, 1.06, 1.09, 1.05, 0.80), + S = c(0.38, 0.40, 0.38, 0.39, 0.34) + ), + theta = 0.39) +) + #' The MFAParams class #' #' S4 class that holds parameters for the mfa simulation. @@ -468,7 +552,6 @@ setClass("SCDDParams", #' @section Parameters: #' #' The mfa simulation uses the following parameters: -#' #' \describe{ #' \item{\code{nGenes}}{The number of genes to simulate.} #' \item{\code{nCells}}{The number of cells to simulate.} diff --git a/R/BASiCS-estimate.R b/R/BASiCS-estimate.R new file mode 100644 index 0000000000000000000000000000000000000000..e29e6a0e662edc136bc53f1a3bbd78e8704fa469 --- /dev/null +++ b/R/BASiCS-estimate.R @@ -0,0 +1,142 @@ +#' Estimate BASiCS simulation parameters +#' +#' Estimate simulation parameters for the BASiCS simulation from a real dataset. +#' +#' @param counts either a counts matrix or an SCESet object containing count +#' data to estimate parameters from. +#' @param spike.info data.frame describing spike-ins with two columns: "Name" +#' giving the names of the spike-in features (must match +#' \code{rownames(counts)}) and "Input" giving the number of input +#' molecules. +#' @param batch vector giving the batch that each cell belongs to. +#' @param n total number of MCMC iterations. Must be \code{>= max(4, thin)} and +#' a multiple of \code{thin}. +#' @param thin thining period for the MCMC sampler. Must be \code{>= 2}. +#' @param burn burn-in period for the MCMC sampler. Must be in the range +#' \code{1 <= burn < n} and a multiple of \code{thin}. +#' @param params BASiCSParams object to store estimated values in. +#' @param verbose logical. Whether to print progress messages. +#' @param progress logical. Whether to print additional BASiCS progress +#' messages. +#' @param ... Optional parameters passed to \code{\link[BASiCS]{BASiCS_MCMC}}. +#' +#' @details +#' This function is just a wrapper around \code{\link[BASiCS]{BASiCS_MCMC}} that +#' takes the output and converts it to a BASiCSParams object. Either a set of +#' spike-ins or batch information (or both) must be supplied. If only batch +#' information is provided there must be at least two batches. See +#' \code{\link[BASiCS]{BASiCS_MCMC}} for details. +#' +#' @return BASiCSParams object containing the estimated parameters. +#' +#' @examples +#' \dontrun{ +#' data("sc_example_counts") +#' spike.info <- data.frame(Name = rownames(sc_example_counts)[1:10], +#' Input = rnorm(10, 500, 200), +#' stringsAsFactors = FALSE) +#' params <- BASiCSEstimate(sc_example_counts[1:50, 1:20], +#' spike.info) +#' params +#' } +#' @export +BASiCSEstimate <- function(counts, spike.info = NULL, batch = NULL, + n = 20000, thin = 10, burn = 5000, + params = newBASiCSParams(), verbose = TRUE, + progress = TRUE, ...) { + UseMethod("BASiCSEstimate") +} + +#' @rdname BASiCSEstimate +#' @export +BASiCSEstimate.SCESet <- function(counts, spike.info = NULL, batch = NULL, + n = 20000, thin = 10, burn = 5000, + params = newBASiCSParams(), verbose = TRUE, + progress = TRUE, ...) { + counts <- BiocGenerics::counts(counts) + BASiCSEstimate(counts, params) +} + +#' @rdname BASiCSEstimate +#' @export +BASiCSEstimate.matrix <- function(counts, spike.info = NULL, batch = NULL, + n = 20000, thin = 10, burn = 5000, + params = newBASiCSParams(), verbose = TRUE, + progress = TRUE, ...) { + + checkmate::assertClass(params, "BASiCSParams") + checkmate::assertMatrix(counts, mode = "numeric", any.missing = FALSE, + min.rows = 1, min.cols = 1, row.names = "unique", + col.names = "unique") + if (is.null(spike.info) && is.null(batch)) { + stop("At least one of spike.info and batch must be provided") + } + if (!is.null(spike.info)) { + checkmate::assertDataFrame(spike.info, any.missing = FALSE, + min.rows = 1, ncols = 2) + if (!all(colnames(spike.info) == c("Name", "Input"))) { + stop("spike.info must have columns named 'Name' and 'Input'") + } + checkmate::assertCharacter(spike.info$Name, min.chars = 1, + unique = TRUE) + checkmate::assertNumeric(spike.info$Input, lower = 0, finite = TRUE) + } #else { + # spike.info <- data.frame(Name = c(), Input = c()) + #} + if (!is.null(batch)) { + checkmate::assertIntegerish(batch, lower = 0, any.missing = FALSE, + len = ncol(counts)) + if (is.null(spike.info) && length(unique(batch)) == 1) { + stop("If spike.info is not provided there must be at least two ", + "batches") + } + } else { + batch <- rep(1, ncol(counts)) + } + checkmate::assertInt(thin, lower = 2) + checkmate::assertInt(n, lower = max(4, thin)) + if ((n %% thin) != 0) { + stop("'n' must be a multiple of 'thin'") + } + checkmate::assertInt(burn, lower = 1, upper = n - 1) + if ((burn %% thin) != 0) { + stop("'burn' must be a multiple of 'thin'") + } + + is.spike <- rownames(counts) %in% spike.info$Name + BASiCS.data <- suppressMessages( + BASiCS::newBASiCS_Data(counts, is.spike, spike.info, + batch) + ) + + if (verbose) { + mcmc <- BASiCS::BASiCS_MCMC(Data = BASiCS.data, N = n, Thin = thin, + Burn = burn, PrintProgress = progress, ...) + } else { + mcmc <- suppressMessages( + BASiCS::BASiCS_MCMC(Data = BASiCS.data, N = n, Thin = thin, + Burn = burn, PrintProgress = progress, + ...) + ) + } + + mcmc.summ <- BASiCS::Summary(mcmc) + + means <- BASiCS::displaySummaryBASiCS(mcmc.summ, Param = "mu")[, 1] + deltas <- BASiCS::displaySummaryBASiCS(mcmc.summ, Param = "delta")[, 1] + phis <- BASiCS::displaySummaryBASiCS(mcmc.summ, Param = "phi")[, 1] + ss <- BASiCS::displaySummaryBASiCS(mcmc.summ, Param = "s")[, 1] + thetas <- BASiCS::displaySummaryBASiCS(mcmc.summ, Param = "theta")[, 1] + + params <- setParams(params, + nGenes = sum(!is.spike), + batchCells = as.vector(table(batch)), + gene.params = data.frame(Mean = means, Delta = deltas), + nSpikes = sum(is.spike), + spike.means = ifelse(!is.null(spike.info), + spike.info$Input, numeric()), + cell.params = data.frame(Phi = phis, S = ss), + theta = thetas) + + return(params) +} diff --git a/R/BASiCS-simulate.R b/R/BASiCS-simulate.R new file mode 100644 index 0000000000000000000000000000000000000000..7b80d69fdc3b48d3c884def858696afca9731be5 --- /dev/null +++ b/R/BASiCS-simulate.R @@ -0,0 +1,139 @@ +#' BASiCS simulation +#' +#' Simulate counts using the BASiCS method. +#' +#' @param params BASiCSParams object containing simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' @param ... any additional parameter settings to override what is provided in +#' \code{params}. +#' +#' @details +#' This function is just a wrapper around \code{\link[BASiCS]{BASiCS_Sim}} that +#' takes a \code{\link{BASiCSParams}}, runs the simulation then converts the +#' output to an \code{\link[scater]{SCESet}} object. See +#' \code{\link[BASiCS]{BASiCS_Sim}} for more details of how the simulation +#' works. +#' +#' @return SingleCellExperiment containing simulated counts +#' +#' @references +#' Vallejos CA, Marioni JC, Richardson S. BASiCS: Bayesian Analysis of +#' Single-Cell Sequencing data. PLoS Comput. Biol. (2015). +#' +#' Paper: \url{10.1371/journal.pcbi.1004333} +#' +#' Code: \url{https://github.com/catavallejos/BASiCS} +#' +#' @examples +#' sim <- BASiCSSimulate() +#' @export +#' @importFrom scater newSCESet +BASiCSSimulate <- function(params = newBASiCSParams(), verbose = TRUE, ...) { + + checkmate::assertClass(params, "BASiCSParams") + params <- setParams(params, ...) + params <- expandParams(params) + validObject(params) + + # Set random seed + seed <- getParam(params, "seed") + set.seed(seed) + + if (verbose) {message("Getting parameters...")} + nGenes <- getParam(params, "nGenes") + nCells <- getParam(params, "nCells") + nSpikes <- getParam(params, "nSpikes") + nBatches <- getParam(params, "nBatches") + batch.cells <- getParam(params, "batchCells") + gene.params <- getParam(params, "gene.params") + + # Sample gene.params if necessary + if (nrow(gene.params) != nGenes) { + warning("Number of gene.params not equal to nGenes, ", + "gene.params will be sampled.") + selected <- sample(nrow(gene.params), nGenes, replace = TRUE) + gene.params <- gene.params[selected, ] + } + + mu <- gene.params$Mean + delta <- gene.params$Delta + + if (nSpikes > 0) { + spike.mu <- getParam(params, "spike.means") + if (length(spike.mu) != nSpikes) { + warning("Number of spike-in means not equal to nSpikes, ", + "spike.means will be sampled.") + selected <- sample(length(spike.mu), nSpikes, replace = TRUE) + spike.mu <- spike.mu[selected] + } + } else { + # Create dummy spike-ins to get around BASiCS_Sim... + spike.mu <- c(10, 10) + } + + cell.params <- getParam(params, "cell.params") + if (nrow(cell.params) != nCells) { + warning("Number of cell.params not equal to nCells, ", + "cell.params will be sampled.") + selected <- sample(nrow(cell.params), nCells, replace = TRUE) + cell.params <- cell.params[selected, ] + } + + thetas <- getParam(params, "theta") + + batches <- lapply(seq_len(nBatches), function(i, b) {rep(i, b[i])}, + b = batch.cells) + batches <- unlist(batches) + + if (verbose) {message("Simulating counts with BASiCS...")} + counts.list <- list() + for (batch in seq_len(nBatches)) { + batch.cells <- batches == batch + phi <- cell.params[batch.cells, "Phi"] + phi <- (phi / sum(phi)) * sum(batch.cells) + s <- cell.params[batch.cells, "S"] + theta <- thetas[batch] + BASiCS.sim <- suppressMessages( + BASiCS::BASiCS_Sim(mu, spike.mu, delta, phi, s, theta) + ) + batch.counts <- SummarizedExperiment::assay(BASiCS.sim) + counts.list[[batch]] <- batch.counts + } + + counts <- do.call(cbind, counts.list) + + if (verbose) {message("Creating final dataset...")} + cell.names <- paste0("Cell", seq_len(nCells)) + gene.names <- paste0("Gene", seq_len(nGenes)) + if (nSpikes > 0) { + gene.names <- c(gene.names, paste0("Spike", seq_len(nSpikes))) + } else { + # Remove dummy spikes + counts <- counts[1:(nrow(counts) - 2), ] + spike.mu <- numeric() + } + + rownames(counts) <- gene.names + colnames(counts) <- cell.names + + cells <- data.frame(Cell = cell.names, + Phi = cell.params[, "Phi"], + S = cell.params[, "S"], + Batch = batches, + BatchTheta = thetas[batches]) + rownames(cells) <- cell.names + + features <- data.frame(Gene = gene.names, + Mean = c(mu, spike.mu), + Delta = c(delta, rep(NA, nSpikes)), + IsSpike = c(rep(FALSE, nGenes), rep(TRUE, nSpikes))) + rownames(features) <- gene.names + + sim <- SingleCellExperiment(assays = list(counts = counts), + rowData = features, + colData = cells, + metadata = list(params = params)) + + if (verbose) {message("Done!")} + return(sim) +} diff --git a/R/BASiCSParams-methods.R b/R/BASiCSParams-methods.R new file mode 100644 index 0000000000000000000000000000000000000000..90243b242b89972c980b1294ef23f4c78ba5a060 --- /dev/null +++ b/R/BASiCSParams-methods.R @@ -0,0 +1,125 @@ +#' @rdname newParams +#' @importFrom methods new +#' @export +newBASiCSParams <- function(...) { + + if (!requireNamespace("BASiCS", quietly = TRUE)) { + stop("The BASiCS simulation requires the 'BASiCS' package.") + } + + params <- new("BASiCSParams") + params <- setParams(params, ...) + + return(params) +} + +#' @importFrom checkmate checkInt checkDataFrame checkNumeric +setValidity("BASiCSParams", function(object) { + + object <- expandParams(object) + v <- getParams(object, slotNames(object)) + + nCells <- v$nCells + nGenes <- v$nGenes + nBatches <- v$nBatches + checks <- c(seed = checkInt(v$seed, lower = 0), + nGenes = checkInt(v$nGenes, lower = 1), + nCells = checkInt(v$nCells, lower = 1), + nBatches = checkInt(v$nBatches, lower = 1), + batchCells = checkIntegerish(v$batchCells, lower = 1, + len = nBatches), + gene.params = checkDataFrame(v$gene.params, + types = "numeric", + any.missing = FALSE, + min.rows = 1, ncols = 2), + nSpikes = checkNumber(v$nSpikes, lower = 0, finite = TRUE), + spike.means = checkNumeric(v$spike.means, lower = 0, + finite = TRUE), + cell.params = checkDataFrame(v$cell.params, + types = "numeric", + any.missing = FALSE, + min.rows = 1, ncols = 2), + theta = checkNumeric(v$theta, lower = 0, len = nBatches, + finite = TRUE) + ) + + if (!all(colnames(v$gene.params) == c("Mean", "Delta"))) { + checks <- c(checks, gene.params = "Incorrect column names") + } + + if (!all(colnames(v$cell.params) == c("Phi", "S"))) { + checks <- c(checks, cell.params = "Incorrect column names") + } + + # Check batchCells matches nCells, nBatches + if (nCells != sum(v$batchCells) || nBatches != length(v$batchCells)) { + checks <- c(checks, + "nCells, nBatches and batchCells are not consistent") + } + + if (all(checks == TRUE)) { + valid <- TRUE + } else { + valid <- checks[checks != TRUE] + valid <- paste(names(valid), valid, sep = ": ") + } + + return(valid) +}) + +#' @rdname setParam +setMethod("setParam", "BASiCSParams",function(object, name, value) { + checkmate::assertString(name) + + if (name == "nCells" || name == "nBatches") { + stop(name, " cannot be set directly, set batchCells instead") + } + + if (name == "batchCells") { + object <- setParamUnchecked(object, "nCells", sum(value)) + object <- setParamUnchecked(object, "nBatches", length(value)) + } + + object <- callNextMethod() + + return(object) +}) + +setMethod("show", "BASiCSParams", function(object) { + + pp <- list("Batches:" = c("(Batches)" = "nBatches", + "(Batch Cells)" = "batchCells"), + "Spike-ins:" = c("(Number)" = "nSpikes", + "(Means)" = "spike.means"), + "Variability:" = c("(Theta)" = "theta")) + + callNextMethod() + + gene.params <- getParam(object, "gene.params") + cell.params <- getParam(object, "cell.params") + cat("Genes:", "\n") + cat("(Params)", "\n") + cat("data.frame with", dim(gene.params)[1], "features\n") + print(head(gene.params, n = 3)) + cat(" ... ...\n\n") + + cat("Cells:", "\n") + cat("(Params)", "\n") + cat("data.frame with", dim(cell.params)[1], "features\n") + print(head(cell.params, n = 3)) + cat(" ... ...\n\n") + + showPP(object, pp) +}) + +#' @rdname expandParams +setMethod("expandParams", "BASiCSParams", function(object) { + + n <- getParam(object, "nBatches") + + vectors <- c("theta") + + object <- callNextMethod(object, vectors, n) + + return(object) +}) diff --git a/R/listSims.R b/R/listSims.R index cbf18efc81cb5b9e57531543cd2d03b4b303ac6b..fd19aab2e122981f4837a4ba02611c06f97c68e8 100644 --- a/R/listSims.R +++ b/R/listSims.R @@ -48,6 +48,11 @@ listSims <- function(print = TRUE) { "The scDD simulation samples a given dataset and can simulate differentially expressed and differentially distributed genes between two conditions."), + c("BASiCS", "BASiCS", "10.1371/journal.pcbi.1004333", + "catavallejos/BASiCS", + "The BASiCS simulation is based on a bayesian model used to + deconvolve biological and technical variation and + includes spike-ins and batch effects."), c("mfa", "mfa", "10.12688/wellcomeopenres.11087.1", "kieranrcampbell/mfa", "The mfa simulation produces a bifurcating pseudotime diff --git a/_pkgdown.yml b/_pkgdown.yml index 74aacfba6c37242e8a30deae1f51dd0cf058329a..5a8f3a7499fc3c12f44f98f77d44b9f271916ced 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -8,23 +8,25 @@ reference: - title: Parameters desc: Parameters functions and classes contents: + - '`newParams`' - '`getParam`' - '`getParams`' + - '`setParam`' + - '`setParams`' + - '`BASiCSParams`' - '`Lun2Params`' - '`LunParams`' - '`MFAParams`' - - '`newParams`' - '`Params`' - '`PhenoParams`' - '`SCDDParams`' - '`SimpleParams`' - '`SplatParams`' - - '`setParam`' - - '`setParams`' - '`ZINBParams`' - title: Estimation desc: Functions for estimating parameters contents: + - '`BASiCSEstimate`' - '`lun2Estimate`' - '`lunEstimate`' - '`mfaEstimate`' @@ -41,6 +43,7 @@ reference: - title: Simulation desc: Functions for simulating datasets contents: + - '`BASiCSSimulate`' - '`lun2Simulate`' - '`lunSimulate`' - '`mfaSimulate`' diff --git a/man/BASiCSEstimate.Rd b/man/BASiCSEstimate.Rd new file mode 100644 index 0000000000000000000000000000000000000000..0fabfebad0e5f63082e073c8eb9913d0bb97bae3 --- /dev/null +++ b/man/BASiCSEstimate.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/BASiCS-estimate.R +\name{BASiCSEstimate} +\alias{BASiCSEstimate} +\alias{BASiCSEstimate.SCESet} +\alias{BASiCSEstimate.matrix} +\title{Estimate BASiCS simulation parameters} +\usage{ +BASiCSEstimate(counts, spike.info = NULL, batch = NULL, n = 20000, + thin = 10, burn = 5000, params = newBASiCSParams(), verbose = TRUE, + progress = TRUE, ...) + +\method{BASiCSEstimate}{SCESet}(counts, spike.info = NULL, batch = NULL, + n = 20000, thin = 10, burn = 5000, params = newBASiCSParams(), + verbose = TRUE, progress = TRUE, ...) + +\method{BASiCSEstimate}{matrix}(counts, spike.info = NULL, batch = NULL, + n = 20000, thin = 10, burn = 5000, params = newBASiCSParams(), + verbose = TRUE, progress = TRUE, ...) +} +\arguments{ +\item{counts}{either a counts matrix or an SCESet object containing count +data to estimate parameters from.} + +\item{spike.info}{data.frame describing spike-ins with two columns: "Name" +giving the names of the spike-in features (must match +\code{rownames(counts)}) and "Input" giving the number of input +molecules.} + +\item{batch}{vector giving the batch that each cell belongs to.} + +\item{n}{total number of MCMC iterations. Must be \code{>= max(4, thin)} and +a multiple of \code{thin}.} + +\item{thin}{thining period for the MCMC sampler. Must be \code{>= 2}.} + +\item{burn}{burn-in period for the MCMC sampler. Must be in the range +\code{1 <= burn < n} and a multiple of \code{thin}.} + +\item{params}{BASiCSParams object to store estimated values in.} + +\item{verbose}{logical. Whether to print progress messages.} + +\item{progress}{logical. Whether to print additional BASiCS progress +messages.} + +\item{...}{Optional parameters passed to \code{\link[BASiCS]{BASiCS_MCMC}}.} +} +\value{ +BASiCSParams object containing the estimated parameters. +} +\description{ +Estimate simulation parameters for the BASiCS simulation from a real dataset. +} +\details{ +This function is just a wrapper around \code{\link[BASiCS]{BASiCS_MCMC}} that +takes the output and converts it to a BASiCSParams object. Either a set of +spike-ins or batch information (or both) must be supplied. If only batch +information is provided there must be at least two batches. See +\code{\link[BASiCS]{BASiCS_MCMC}} for details. +} +\examples{ +\dontrun{ +data("sc_example_counts") +spike.info <- data.frame(Name = rownames(sc_example_counts)[1:10], + Input = rnorm(10, 500, 200), + stringsAsFactors = FALSE) +params <- BASiCSEstimate(sc_example_counts[1:50, 1:20], + spike.info) +params +} +} diff --git a/man/BASiCSParams.Rd b/man/BASiCSParams.Rd new file mode 100644 index 0000000000000000000000000000000000000000..0bfb09a000d4d203dedf0eb6e7f73e5161080802 --- /dev/null +++ b/man/BASiCSParams.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllClasses.R +\docType{class} +\name{BASiCSParams} +\alias{BASiCSParams} +\alias{BASiCSParams-class} +\title{The BASiCSParams class} +\description{ +S4 class that holds parameters for the BASiCS simulation. +} +\section{Parameters}{ + + +The BASiCS simulation uses the following parameters: +\describe{ + \item{\code{nGenes}}{The number of genes to simulate.} + \item{\code{nCells}}{The number of cells to simulate.} + \item{\code{[seed]}}{Seed to use for generating random numbers.} + \item{\emph{Batch parameters}}{ + \describe{ + \item{\code{nBatches}}{Number of batches to simulate.} + \item{\code{batchCells}}{Number of cells in each batch.} + } + } + \item{\emph{Gene parameters}}{ + \describe{ + \item{\code{gene.params}}{A \code{data.frame} containing gene + parameters with two coloumns: \code{Mean} (mean expression for + each biological gene) and \code{Delta} (cell-to-cell + heterogeneity for each biological gene).} + } + } + \item{\emph{Spike-in parameters}}{ + \describe{ + \item{\code{nSpikes}}{The number of spike-ins to simulate.} + \item{\code{spike.means}}{Input molecules for each spike-in.} + } + } + \item{\emph{Cell parameters}}{ + \describe{ + \item{\code{cell.params}}{A \code{data.frame} containing gene + parameters with two coloumns: \code{Phi} (mRNA content factor for + each cell, scaled to sum to the number of cells in each batch) + and \code{S} (capture efficient for each cell).} + } + } + \item{\emph{Variability parameters}}{ + \describe{ + \item{\code{theta}}{Technical variability parameter for each + batch.} + } + } +} + +The parameters not shown in brackets can be estimated from real data using +\code{\link{BASiCSEstimate}}. For details of the BASiCS simulation see +\code{\link{BASiCSSimulate}}. +} + diff --git a/man/BASiCSSimulate.Rd b/man/BASiCSSimulate.Rd new file mode 100644 index 0000000000000000000000000000000000000000..cc92db054bd1f6f8259a9315258eb7719c4736cd --- /dev/null +++ b/man/BASiCSSimulate.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/BASiCS-simulate.R +\name{BASiCSSimulate} +\alias{BASiCSSimulate} +\title{BASiCS simulation} +\usage{ +BASiCSSimulate(params = newBASiCSParams(), verbose = TRUE, ...) +} +\arguments{ +\item{params}{BASiCSParams object containing simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} + +\item{...}{any additional parameter settings to override what is provided in +\code{params}.} +} +\value{ +SingleCellExperiment containing simulated counts +} +\description{ +Simulate counts using the BASiCS method. +} +\details{ +This function is just a wrapper around \code{\link[BASiCS]{BASiCS_Sim}} that +takes a \code{\link{BASiCSParams}}, runs the simulation then converts the +output to an \code{\link[scater]{SCESet}} object. See +\code{\link[BASiCS]{BASiCS_Sim}} for more details of how the simulation +works. +} +\examples{ +sim <- BASiCSSimulate() +} +\references{ +Vallejos CA, Marioni JC, Richardson S. BASiCS: Bayesian Analysis of +Single-Cell Sequencing data. PLoS Comput. Biol. (2015). + +Paper: \url{10.1371/journal.pcbi.1004333} + +Code: \url{https://github.com/catavallejos/BASiCS} +} diff --git a/man/Lun2Params.Rd b/man/Lun2Params.Rd index c49a92d153c5606e0cd32dc103a2f74fa7fcc66c..db97ba60ce719bf195a30172f9ae3b870fa2cb68 100644 --- a/man/Lun2Params.Rd +++ b/man/Lun2Params.Rd @@ -6,7 +6,7 @@ \alias{Lun2Params-class} \title{The Lun2Params class} \description{ -S4 class that holds parameters for the Lun simulation. +S4 class that holds parameters for the Lun2 simulation. } \section{Parameters}{ diff --git a/man/MFAParams.Rd b/man/MFAParams.Rd index 98513986450613ef64bad9e52401da3085e1c54c..10671a56d85822bb5a40edb87d8f502e9437b023 100644 --- a/man/MFAParams.Rd +++ b/man/MFAParams.Rd @@ -12,7 +12,6 @@ S4 class that holds parameters for the mfa simulation. The mfa simulation uses the following parameters: - \describe{ \item{\code{nGenes}}{The number of genes to simulate.} \item{\code{nCells}}{The number of cells to simulate.} diff --git a/man/expandParams.Rd b/man/expandParams.Rd index 5d54fc3e122f3566cf5698fde2e0e9a7a724f529..c6f83797cdbadf245f9e033ce8b25c4d1321a748 100644 --- a/man/expandParams.Rd +++ b/man/expandParams.Rd @@ -1,15 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/AllGenerics.R, R/LunParams-methods.R, -% R/SplatParams-methods.R +% Please edit documentation in R/AllGenerics.R, R/BASiCSParams-methods.R, +% R/LunParams-methods.R, R/SplatParams-methods.R \docType{methods} \name{expandParams} \alias{expandParams} +\alias{expandParams,BASiCSParams-method} \alias{expandParams,LunParams-method} \alias{expandParams,SplatParams-method} \title{Expand parameters} \usage{ expandParams(object, ...) +\S4method{expandParams}{BASiCSParams}(object) + \S4method{expandParams}{LunParams}(object) \S4method{expandParams}{SplatParams}(object) diff --git a/man/newParams.Rd b/man/newParams.Rd index bac473177a58a9f19db7e21ae2e75ce97a887d34..e8d75401ae5a00cd3e660850d49fda896940b376 100644 --- a/man/newParams.Rd +++ b/man/newParams.Rd @@ -1,10 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/AllGenerics.R, R/Lun2Params-methods.R, -% R/LunParams-methods.R, R/MFAParams-methods.R, R/PhenoParams-methods.R, -% R/SCDDParams-methods.R, R/SimpleParams-methods.R, R/SplatParams-methods.R, -% R/ZINBParams-methods.R +% Please edit documentation in R/AllGenerics.R, R/BASiCSParams-methods.R, +% R/Lun2Params-methods.R, R/LunParams-methods.R, R/MFAParams-methods.R, +% R/PhenoParams-methods.R, R/SCDDParams-methods.R, R/SimpleParams-methods.R, +% R/SplatParams-methods.R, R/ZINBParams-methods.R \name{newParams} \alias{newParams} +\alias{newBASiCSParams} \alias{newLun2Params} \alias{newLunParams} \alias{newMFAParams} @@ -15,6 +16,8 @@ \alias{newZINBParams} \title{New Params} \usage{ +newBASiCSParams(...) + newLun2Params(...) newLunParams(...) diff --git a/man/setParam.Rd b/man/setParam.Rd index 3958a594224ee6318da204d9d4afa71715a82727..378560f36fab9d73f53da98db53106e2d4f0eb6f 100644 --- a/man/setParam.Rd +++ b/man/setParam.Rd @@ -1,10 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/AllGenerics.R, R/Lun2Params-methods.R, -% R/LunParams-methods.R, R/Params-methods.R, R/PhenoParams-methods.R, -% R/SCDDParams-methods.R, R/SplatParams-methods.R, R/ZINBParams-methods.R +% Please edit documentation in R/AllGenerics.R, R/BASiCSParams-methods.R, +% R/Lun2Params-methods.R, R/LunParams-methods.R, R/Params-methods.R, +% R/PhenoParams-methods.R, R/SCDDParams-methods.R, R/SplatParams-methods.R, +% R/ZINBParams-methods.R \docType{methods} \name{setParam} \alias{setParam} +\alias{setParam,BASiCSParams-method} \alias{setParam,Lun2Params-method} \alias{setParam,LunParams-method} \alias{setParam,Params-method} @@ -16,6 +18,8 @@ \usage{ setParam(object, name, value) +\S4method{setParam}{BASiCSParams}(object, name, value) + \S4method{setParam}{Lun2Params}(object, name, value) \S4method{setParam}{LunParams}(object, name, value) diff --git a/man/zinbEstimate.Rd b/man/zinbEstimate.Rd index 84956c9d532364f5793ff5e530328f17abcc2c7c..7d819e37f7a930b0ee15d1991db1dc406c6396ae 100644 --- a/man/zinbEstimate.Rd +++ b/man/zinbEstimate.Rd @@ -61,7 +61,9 @@ the fitted model and inserts it into a \code{\link{ZINBParams}} object. See \code{\link[zinbwave]{zinbFit}} for details of the estimation procedure. } \examples{ +\dontrun{ data("sc_example_counts") params <- zinbEstimate(sc_example_counts) params } +} diff --git a/tests/testthat/test-BASiCSParams.R b/tests/testthat/test-BASiCSParams.R new file mode 100644 index 0000000000000000000000000000000000000000..541abc15815caaa54b6f0ec4b5fff2c35e158e7b --- /dev/null +++ b/tests/testthat/test-BASiCSParams.R @@ -0,0 +1,27 @@ +context("BASiCSParams") + +test_that("gene.params checks work", { + params <- newBASiCSParams() + expect_error(setParam(params, "gene.params", data.frame(A = 1, B = 1)), + "gene.params: Incorrect column names") + expect_error(setParam(params, "gene.params", + data.frame(Mean = 1, Disp = "a")), + "gene.params: May only contain the following types: numeric") +}) + +test_that("cell.params checks work", { + params <- newBASiCSParams() + expect_error(setParam(params, "cell.params", data.frame(A = 1, B = 1)), + "cell.params: Incorrect column names") + expect_error(setParam(params, "cell.params", + data.frame(Phi = 1, S = "a")), + "cell.params: May only contain the following types: numeric") +}) + +test_that("nBatches checks work", { + params <- newBASiCSParams() + expect_error(setParam(params, "nCells", 1), + "nCells cannot be set directly, set batchCells instead") + expect_error(setParam(params, "nBatches", 1), + "nBatches cannot be set directly, set batchCells instead") +})