diff --git a/DESCRIPTION b/DESCRIPTION index 37176877b37d73708279f5ad00866223e512572d..4be8b4b0849846355db27205e005ee484f1fc90a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,7 +52,8 @@ Suggests: S4Vectors, scDD, scran, - mfa + mfa, + phenopath biocViews: SingleCell, RNASeq, Transcriptomics, GeneExpression, Sequencing, Software URL: https://github.com/Oshlack/splatter diff --git a/NAMESPACE b/NAMESPACE index 5017918a15815477a8c8c0152abb2c45d72d8349..e388c4739b1229bb1dd6937937beb9313068f8d5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,8 @@ S3method(lunEstimate,SingleCellExperiment) S3method(lunEstimate,matrix) S3method(mfaEstimate,SingleCellExperiment) S3method(mfaEstimate,matrix) +S3method(phenoEstimate,SingleCellExperiment) +S3method(phenoEstimate,matrix) S3method(scDDEstimate,SingleCellExperiment) S3method(scDDEstimate,default) S3method(scDDEstimate,matrix) @@ -31,9 +33,12 @@ export(mfaSimulate) export(newLun2Params) export(newLunParams) export(newMFAParams) +export(newPhenoParams) export(newSCDDParams) export(newSimpleParams) export(newSplatParams) +export(phenoEstimate) +export(phenoSimulate) export(scDDEstimate) export(scDDSimulate) export(setParam) @@ -49,6 +54,7 @@ export(summariseDiff) exportClasses(Lun2Params) exportClasses(LunParams) exportClasses(MFAParams) +exportClasses(PhenoParams) exportClasses(SCDDParams) exportClasses(SimpleParams) exportClasses(SplatParams) diff --git a/R/AllClasses.R b/R/AllClasses.R index 536efced17047cef1f4b74ba0cca485c5cf6f904..e9a131c926fd7e0cd2bedae14601b4b5a814c837 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -500,3 +500,43 @@ setClass("MFAParams", dropout.lambda = "numeric"), prototype = prototype(trans.prop = 0, zero.neg = TRUE, dropout.present = FALSE, dropout.lambda = 1)) + + +#' The PhenoParams class +#' +#' S4 class that holds parameters for the PhenoPath simulation. +#' +#' @section Parameters: +#' +#' The PhenoPath simulation uses the following parameters: +#' +#' \describe{ +#' \item{\code{nGenes}}{The number of genes to simulate.} +#' \item{\code{nCells}}{The number of cells to simulate.} +#' \item{\code{[seed]}}{Seed to use for generating random numbers.} +#' \item{\code{[n.de]}}{Number of genes to simulate from the differential +#' expression regime} +#' \item{\code{[n.pst]}}{Number of genes to simulate from the pseudotime +#' regime} +#' \item{\code{[n.pst.beta]}}{Number of genes to simulate from the +#' pseudotime + beta interactions regime} +#' \item{\code{[n.de.pst.beta]}}{Number of genes to simulate from the +#' differential expression + pseudotime + interactions regime} +#' } +#' +#' The parameters not shown in brackets can be estimated from real data using +#' \code{\link{phenoEstimate}}. For details of the PhenoPath simulation +#' see \code{\link{phenoSimulate}}. +#' +#' @name PhenoParams +#' @rdname PhenoParams +#' @aliases PhenoParams-class +#' @exportClass PhenoParams +setClass("PhenoParams", + contains = "Params", + slots = c(n.de = "numeric", + n.pst = "numeric", + n.pst.beta = "numeric", + n.de.pst.beta = "numeric"), + prototype = prototype(n.de = 2500, n.pst = 2500, n.pst.beta = 2500, + n.de.pst.beta = 2500)) diff --git a/R/PhenoParams-methods.R b/R/PhenoParams-methods.R new file mode 100644 index 0000000000000000000000000000000000000000..2151c52c6a877a3d7503040e55468f8159ff30f2 --- /dev/null +++ b/R/PhenoParams-methods.R @@ -0,0 +1,79 @@ +#' @rdname newParams +#' @importFrom methods new +#' @export +newPhenoParams <- function(...) { + + if (!requireNamespace("phenopath", quietly = TRUE)) { + stop("The PhenoPath simulation requires the 'phenopath' package.") + } + + params <- new("PhenoParams") + params <- setParams(params, ...) + + return(params) +} + +setValidity("PhenoParams", function(object) { + + v <- getParams(object, slotNames(object)) + + checks <- c(nGenes = checkmate::checkInt(v$nGenes, lower = 1), + nCells = checkmate::checkInt(v$nCells, lower = 1), + n.de = checkmate::checkInt(v$n.de, lower = 0), + n.pst = checkmate::checkInt(v$n.pst, lower = 0), + n.pst.beta = checkmate::checkInt(v$n.pst.beta, lower = 0), + n.de.pst.beta = checkmate::checkInt(v$n.de.pst.beta, lower = 0), + seed = checkmate::checkInt(v$seed, lower = 0)) + + if (v$nGenes != (v$n.de + v$n.pst + v$n.pst.beta + v$n.de.pst.beta)) { + checks <- c(checks, + nGenes = paste("nGenes is not consistent with", + "n.de, n.pst, n.pst.beta, n.de.pst.beta")) + } + + if (all(checks == TRUE)) { + valid <- TRUE + } else { + valid <- checks[checks != TRUE] + valid <- paste(names(valid), valid, sep = ": ") + } + + return(valid) +}) + +#' @rdname setParam +setMethod("setParam", "PhenoParams", function(object, name, value) { + checkmate::assertString(name) + + if (name == "nGenes") { + stop(name, " cannot be set directly, set n.de, n.pst, n.pst.beta or ", + "n.de.pst.beta instead") + } + + nNames <- c("n.de", "n.pst", "n.pst.beta", "n.de.pst.beta") + if (name %in% nNames) { + checkmate::assertInt(value, lower = 0) + total <- value + for (nName in nNames) { + if (nName != name) { + total <- total + getParam(object, nName) + } + } + object <- setParamUnchecked(object, "nGenes", total) + } + + object <- callNextMethod() + + return(object) +}) + +setMethod("show", "PhenoParams", function(object) { + + pp <- list("Genes:" = c("[DE]" = "n.de", + "[PST]" = "n.pst", + "[PST + Beta]" = "n.pst.beta", + "[DE + PST + Beta]" = "n.de.pst.beta")) + + callNextMethod() + showPP(object, pp) +}) diff --git a/R/listSims.R b/R/listSims.R index 51fb3ff91491b274ff28f5e82d0187bb523c63ba..a7085b7521b8ddffaaa9fe495ea3731dea586130 100644 --- a/R/listSims.R +++ b/R/listSims.R @@ -52,7 +52,11 @@ listSims <- function(print = TRUE) { "kieranrcampbell/mfa", "The mfa simulation produces a bifurcating pseudotime trajectory. This can optionally include genes with transient - changes in expression and added dropout.")) + changes in expression and added dropout."), + c("PhenoPath", "pheno", "10.1101/159913", + "kieranrcampbell/phenopath", + "The PhenoPath simulation produces a pseudotime trajectory + with different types of genes.")) sims.table <- data.frame(Name = rep(NA, length(sims)), Prefix = rep(NA, length(sims)), diff --git a/R/pheno-estimate.R b/R/pheno-estimate.R new file mode 100644 index 0000000000000000000000000000000000000000..61cf4fc4541fa47916b120a9462338c3d1fa12a9 --- /dev/null +++ b/R/pheno-estimate.R @@ -0,0 +1,49 @@ +#' Estimate PhenoPath simulation parameters +#' +#' Estimate simulation parameters for the PhenoPath simulation from a real +#' dataset. +#' +#' @param counts either a counts matrix or an SingleCellExperiment object +#' containing count data to estimate parameters from. +#' @param params PhenoParams object to store estimated values in. +#' +#' @details +#' The \code{nGenes} and \code{nCells} parameters are taken from the size of the +#' input data. The total number of genes is evenly divided into the four types. +#' See \code{\link{PhenoParams}} for more details on the parameters. +#' +#' @return PhenoParams object containing the estimated parameters. +#' +#' @examples +#' data("sc_example_counts") +#' params <- phenoEstimate(sc_example_counts) +#' params +#' @export +phenoEstimate <- function(counts, params = newPhenoParams()) { + UseMethod("phenoEstimate") +} + +#' @rdname phenoEstimate +#' @export +phenoEstimate.SingleCellExperiment <- function(counts, + params = newPhenoParams()) { + counts <- BiocGenerics::counts(counts) + phenoEstimate(counts, params) +} + +#' @rdname phenoEstimate +#' @export +phenoEstimate.matrix <- function(counts, params = newPhenoParams()) { + + checkmate::assertClass(params, "PhenoParams") + + nGenes <- nrow(counts) + quarter <- floor(nGenes / 4) + + params <- setParams(params, nCells = ncol(counts), + n.de = nGenes - 3 * quarter, + n.pst = quarter, n.pst.beta = quarter, + n.de.pst.beta = quarter) + + return(params) +} diff --git a/R/pheno-simulate.R b/R/pheno-simulate.R new file mode 100644 index 0000000000000000000000000000000000000000..70042e49966ddba3d13947cce5aaeb4e16e94b37 --- /dev/null +++ b/R/pheno-simulate.R @@ -0,0 +1,84 @@ +#' PhenoPath simulation +#' +#' Simulate counts from a pseudotime trajectory using the PhenoPath method. +#' +#' @param params PhenoParams object containing simulation parameters. +#' @param verbose logical. Whether to print progress messages +#' @param ... any additional parameter settings to override what is provided in +#' \code{params}. +#' +#' @details +#' This function is just a wrapper around +#' \code{\link[phenopath]{simulate_phenopath}} that takes a +#' \code{\link{PhenoParams}}, runs the simulation then converts the +#' output to a \code{\link[SingleCellExperiment]{SingleCellExperiment}} object. +#' See \code{\link[phenopath]{simulate_phenopath}} and the PhenoPath paper for +#' more details about how the simulation works. +#' +#' @return SingleCellExperiment containing simulated counts +#' +#' @references +#' Campbell K, Yau C. Uncovering genomic trajectories with heterogeneous genetic +#' and environmental backgrounds across single-cells and populations. bioRxiv +#' (2017). +#' +#' Paper: \url{10.1101/159913} +#' +#' Code: \url{https://github.com/kieranrcampbell/phenopath} +#' +#' @examples +#' sim <- phenoSimulate() +#' +#' @export +#' @importFrom SingleCellExperiment SingleCellExperiment +phenoSimulate <- function(params = newPhenoParams(), verbose = TRUE, ...) { + + checkmate::assertClass(params, "PhenoParams") + params <- setParams(params, ...) + + # Set random seed + seed <- getParam(params, "seed") + set.seed(seed) + + # Get the parameters we are going to use + nCells <- getParam(params, "nCells") + nGenes <- getParam(params, "nGenes") + n.de <- getParam(params, "n.de") + n.pst <- getParam(params, "n.pst") + n.pst.beta <- getParam(params, "n.pst.beta") + n.de.pst.beta <- getParam(params, "n.de.pst.beta") + + if (verbose) {message("Simulating counts...")} + pheno.sim <- phenopath::simulate_phenopath(N = nCells, + G_de = n.de, + G_pst = n.pst, + G_pst_beta = n.pst.beta, + G_de_pst_beta = n.de.pst.beta) + + if (verbose) {message("Creating final dataset...")} + cell.names <- paste0("Cell", seq_len(nCells)) + gene.names <- paste0("Gene", seq_len(nGenes)) + + counts <- t(pheno.sim$y) + rownames(counts) <- gene.names + colnames(counts) <- cell.names + + cells <- data.frame(Cell = cell.names, + Covariate = pheno.sim$x, + Pseudotime = pheno.sim$z) + rownames(cells) <- cell.names + + features <- data.frame(Gene = gene.names, + Alpha = pheno.sim$parameters$alpha, + Lambda = pheno.sim$parameters$lambda, + Beta = pheno.sim$parameters$beta, + Regime = pheno.sim$parameters$regime) + rownames(features) <- gene.names + + sim <- SingleCellExperiment(assays = list(counts = counts), + rowData = features, + colData = cells, + metadata = list(params = params)) + + return(sim) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index cd4c1f15af8c7b76d2e5256036d63835e03d3ba6..0bf1df13f10d6fe928dac469a64c352e92f5c6e7 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -15,6 +15,7 @@ reference: - '`MFAParams`' - '`newParams`' - '`Params`' + - '`phenoParams`' - '`SCDDParams`' - '`SimpleParams`' - '`SplatParams`' @@ -26,6 +27,7 @@ reference: - '`lun2Estimate`' - '`lunEstimate`' - '`mfaEstimate`' + - '`phenoEstimate`' - '`scDDEstimate`' - '`simpleEstimate`' - '`splatEstBCV`' @@ -40,6 +42,7 @@ reference: - '`lun2Simulate`' - '`lunSimulate`' - '`mfaSimulate`' + - '`phenoEstimate`' - '`scDDSimulate`' - '`simpleSimulate`' - '`splatSimBatchCellMeans`' diff --git a/man/PhenoParams.Rd b/man/PhenoParams.Rd new file mode 100644 index 0000000000000000000000000000000000000000..b1e135f28ffa19cc6fa11e950bf9295a2d443f6a --- /dev/null +++ b/man/PhenoParams.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllClasses.R +\docType{class} +\name{PhenoParams} +\alias{PhenoParams} +\alias{PhenoParams-class} +\title{The PhenoParams class} +\description{ +S4 class that holds parameters for the PhenoPath simulation. +} +\section{Parameters}{ + + +The PhenoPath simulation uses the following parameters: + +\describe{ + \item{\code{nGenes}}{The number of genes to simulate.} + \item{\code{nCells}}{The number of cells to simulate.} + \item{\code{[seed]}}{Seed to use for generating random numbers.} + \item{\code{[n.de]}}{Number of genes to simulate from the differential + expression regime} + \item{\code{[n.pst]}}{Number of genes to simulate from the pseudotime + regime} + \item{\code{[n.pst.beta]}}{Number of genes to simulate from the + pseudotime + beta interactions regime} + \item{\code{[n.de.pst.beta]}}{Number of genes to simulate from the + differential expression + pseudotime + interactions regime} +} + +The parameters not shown in brackets can be estimated from real data using +\code{\link{phenoEstimate}}. For details of the PhenoPath simulation +see \code{\link{phenoSimulate}}. +} + diff --git a/man/newParams.Rd b/man/newParams.Rd index de3e6b85de30198d6b9b53158119455e52e40d50..106202ac305422fbac9a9b60c681fac9d0963685 100644 --- a/man/newParams.Rd +++ b/man/newParams.Rd @@ -1,12 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/Lun2Params-methods.R, -% R/LunParams-methods.R, R/MFAParams-methods.R, R/SCDDParams-methods.R, -% R/SimpleParams-methods.R, R/SplatParams-methods.R +% R/LunParams-methods.R, R/MFAParams-methods.R, R/PhenoParams-methods.R, +% R/SCDDParams-methods.R, R/SimpleParams-methods.R, R/SplatParams-methods.R \name{newParams} \alias{newParams} \alias{newLun2Params} \alias{newLunParams} \alias{newMFAParams} +\alias{newPhenoParams} \alias{newSCDDParams} \alias{newSimpleParams} \alias{newSplatParams} @@ -18,6 +19,8 @@ newLunParams(...) newMFAParams(...) +newPhenoParams(...) + newSCDDParams(...) newSimpleParams(...) diff --git a/man/phenoEstimate.Rd b/man/phenoEstimate.Rd new file mode 100644 index 0000000000000000000000000000000000000000..5b698238acbc48c4a606cc9fd7a2c3b550936311 --- /dev/null +++ b/man/phenoEstimate.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pheno-estimate.R +\name{phenoEstimate} +\alias{phenoEstimate} +\alias{phenoEstimate.SingleCellExperiment} +\alias{phenoEstimate.matrix} +\title{Estimate PhenoPath simulation parameters} +\usage{ +phenoEstimate(counts, params = newPhenoParams()) + +\method{phenoEstimate}{SingleCellExperiment}(counts, + params = newPhenoParams()) + +\method{phenoEstimate}{matrix}(counts, params = newPhenoParams()) +} +\arguments{ +\item{counts}{either a counts matrix or an SingleCellExperiment object +containing count data to estimate parameters from.} + +\item{params}{PhenoParams object to store estimated values in.} +} +\value{ +PhenoParams object containing the estimated parameters. +} +\description{ +Estimate simulation parameters for the PhenoPath simulation from a real +dataset. +} +\details{ +The \code{nGenes} and \code{nCells} parameters are taken from the size of the +input data. The total number of genes is evenly divided into the four types. +See \code{\link{PhenoParams}} for more details on the parameters. +} +\examples{ +data("sc_example_counts") +params <- phenoEstimate(sc_example_counts) +params +} diff --git a/man/phenoSimulate.Rd b/man/phenoSimulate.Rd new file mode 100644 index 0000000000000000000000000000000000000000..2effe2029d624eccd684c90b8685b215704e7aa8 --- /dev/null +++ b/man/phenoSimulate.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pheno-simulate.R +\name{phenoSimulate} +\alias{phenoSimulate} +\title{PhenoPath simulation} +\usage{ +phenoSimulate(params = newPhenoParams(), verbose = TRUE, ...) +} +\arguments{ +\item{params}{PhenoParams object containing simulation parameters.} + +\item{verbose}{logical. Whether to print progress messages} + +\item{...}{any additional parameter settings to override what is provided in +\code{params}.} +} +\value{ +SingleCellExperiment containing simulated counts +} +\description{ +Simulate counts from a pseudotime trajectory using the PhenoPath method. +} +\details{ +This function is just a wrapper around +\code{\link[phenopath]{simulate_phenopath}} that takes a +\code{\link{PhenoParams}}, runs the simulation then converts the +output to a \code{\link[SingleCellExperiment]{SingleCellExperiment}} object. +See \code{\link[phenopath]{simulate_phenopath}} and the PhenoPath paper for +more details about how the simulation works. +} +\examples{ +sim <- phenoSimulate() + +} +\references{ +Campbell K, Yau C. Uncovering genomic trajectories with heterogeneous genetic +and environmental backgrounds across single-cells and populations. bioRxiv +(2017). + +Paper: \url{10.1101/159913} + +Code: \url{https://github.com/kieranrcampbell/phenopath} +} diff --git a/man/setParam.Rd b/man/setParam.Rd index 9c1ba5341613eb970da5a70d097a52fc521d0f60..ce8a32ca05e800826ca81b60da703c6c538966bd 100644 --- a/man/setParam.Rd +++ b/man/setParam.Rd @@ -1,13 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/Lun2Params-methods.R, -% R/LunParams-methods.R, R/Params-methods.R, R/SCDDParams-methods.R, -% R/SplatParams-methods.R +% R/LunParams-methods.R, R/Params-methods.R, R/PhenoParams-methods.R, +% R/SCDDParams-methods.R, R/SplatParams-methods.R \docType{methods} \name{setParam} \alias{setParam} \alias{setParam,Lun2Params-method} \alias{setParam,LunParams-method} \alias{setParam,Params-method} +\alias{setParam,PhenoParams-method} \alias{setParam,SCDDParams-method} \alias{setParam,SplatParams-method} \title{Set a parameter} @@ -20,6 +21,8 @@ setParam(object, name, value) \S4method{setParam}{Params}(object, name, value) +\S4method{setParam}{PhenoParams}(object, name, value) + \S4method{setParam}{SCDDParams}(object, name, value) \S4method{setParam}{SplatParams}(object, name, value) diff --git a/tests/testthat/test-PhenoParams.R b/tests/testthat/test-PhenoParams.R new file mode 100644 index 0000000000000000000000000000000000000000..535724ad0b4ddf631c7e9e10a4430a9be9d092d1 --- /dev/null +++ b/tests/testthat/test-PhenoParams.R @@ -0,0 +1,15 @@ +context("PhenoParams") + +test_that("constructor is valid", { + expect_true(validObject(newSCDDParams())) +}) + +test_that("nGenes checks work", { + params <- newPhenoParams() + expect_error(setParam(params, "nGenes", 1), + "nGenes cannot be set directly") + params <- setParam(params, "n.de", 0) + total <- getParam(params, "n.de") + getParam(params, "n.pst") + + getParam(params, "n.pst.beta") + getParam(params, "n.de.pst.beta") + expect_equal(getParam(params, "nGenes"), total) +}) diff --git a/tests/testthat/test-SplatParams.R b/tests/testthat/test-SplatParams.R index 88c32aa5b5cc01712952d601bc1ad7bb8505dc0f..208f0dfa24b672635f12709d4823b49cdaa3c7d7 100644 --- a/tests/testthat/test-SplatParams.R +++ b/tests/testthat/test-SplatParams.R @@ -20,9 +20,7 @@ test_that("path.from checks work", { params <- setParamUnchecked(params, "path.from", c(0, 1)) expect_silent(validObject(params)) params <- setParamUnchecked(params, "path.from", c(0, 3)) - expect_error(validObject(params), - paste('invalid class "SplatParams" object: path.from:', - "All elements must be <= 2")) + expect_error(validObject(params), "invalid class") params <- setParamUnchecked(params, "path.from", c(1, 0)) expect_error(validObject(params), "path cannot begin at itself") params <- newSplatParams() diff --git a/tests/testthat/test-pheno-simulate.R b/tests/testthat/test-pheno-simulate.R new file mode 100644 index 0000000000000000000000000000000000000000..f67a25e72e0ca8cd8ef82537f83a752ae23a429a --- /dev/null +++ b/tests/testthat/test-pheno-simulate.R @@ -0,0 +1,5 @@ +context("PhenoPath simulation") + +test_that("PhenoPath simulation output is valid", { + expect_true(validObject(phenoSimulate())) +})