diff --git a/DESCRIPTION b/DESCRIPTION index 5b99b77dab69425790e5ae6b839266d44d0a6f68..e4ed84ef585cb45400a7f4a6d4ccf7c964637570 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 0.99.12 -Date: 2017-03-22 +Version: 0.99.13 +Date: 2017-03-25 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index c00aebb4c71329718aad6b095bda5f76c2abe6de..2fac85b2ceab64fb95336e95c5c64a2a2688a917 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,11 +50,11 @@ importFrom(BiocParallel,SerialParam) importFrom(BiocParallel,bplapply) importFrom(checkmate,checkCharacter) importFrom(checkmate,checkClass) +importFrom(checkmate,checkDataFrame) importFrom(checkmate,checkFactor) importFrom(checkmate,checkFlag) importFrom(checkmate,checkInt) importFrom(checkmate,checkIntegerish) -importFrom(checkmate,checkLogical) importFrom(checkmate,checkNumber) importFrom(checkmate,checkNumeric) importFrom(ggplot2,aes_string) diff --git a/NEWS.md b/NEWS.md index 308481dee215ae38435b069d9ff3024d2b365467..124ce57ccd9cde8fe998b4341fb1d2feeb1fca9d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# 0.99.13 + +* Modify how Lun2Params stores gene paramters to use data.frame +* Move sampling of genes/cells to lun2Simulate +* Return to old Lun2 nGenes estimate + # 0.99.12 * Add diffSCESets function diff --git a/R/AllClasses.R b/R/AllClasses.R index 568b391450cd5124b83af544bf93130f2c20ea1c..508407726476615522391240b783731d90c9f802 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -294,6 +294,17 @@ setClass("LunParams", #' \item{\code{nGenes}}{The number of genes to simulate.} #' \item{\code{nCells}}{The number of cells to simulate.} #' \item{\code{[seed]}}{Seed to use for generating random numbers.} +#' \item{\emph{Gene parameters}}{ +#' \describe{ +#' \item{\code{gene.params}}{A \code{data.frame} containing gene +#' parameters with two coloumns: \code{Mean} (mean expression for +#' each gene) and \code{Disp} (dispersion for each gene).} +#' \item{\code{zi.params}}{A \code{data.frame} containing +#' zero-inflated gene parameters with three coloumns: \code{Mean} +#' (mean expression for each gene), \code{Disp} (dispersion for +#' each, gene), and \code{Prop} (zero proportion for each gene).} +#' } +#' } #' \item{\code{[nPlates]}}{The number of plates to simulate.} #' \item{\emph{Plate parameters}}{ #' \describe{ @@ -304,15 +315,6 @@ setClass("LunParams", #' \item{\code{plate.var}}{Plate effect variance.} #' } #' } -#' \item{\emph{Gene parameters}}{ -#' \describe{ -#' \item{\code{gene.means}}{Mean expression for each gene.} -#' \item{\code{gene.disps}}{Dispersion for each gene.} -#' \item{\code{gene.ziMeans}}{Zero-inflated gene means.} -#' \item{\code{gene.ziDisps}}{Zero-inflated gene dispersions.} -#' \item{\code{gene.ziProps}}{Zero-inflated gene zero proportions.} -#' } -#' } #' \item{\emph{Cell parameters}}{ #' \describe{ #' \item{\code{cell.plates}}{Factor giving the plate that each cell @@ -346,11 +348,8 @@ setClass("Lun2Params", plate.ingroup = "character", plate.mod = "numeric", plate.var = "numeric", - gene.means = "numeric", - gene.disps = "numeric", - gene.ziMeans = "numeric", - gene.ziDisps = "numeric", - gene.ziProps = "numeric", + gene.params = "data.frame", + zi.params = "data.frame", cell.plates = "numeric", cell.libSizes = "numeric", cell.libMod = "numeric", @@ -361,11 +360,13 @@ setClass("Lun2Params", plate.ingroup = "1", plate.mod = 1, plate.var = 14, - gene.means = rep(3.2, 10000), - gene.disps = rep(0.03, 10000), - gene.ziMeans = rep(1.6, 10000), - gene.ziDisps = rep(0.1, 10000), - gene.ziProps = rep(2.3e-6, 10000), + gene.params = data.frame(Mean = rep(3.2, 10000), + Disp = rep(0.03, 10000) + ), + zi.params = data.frame(Mean = rep(1.6, 10000), + Disp = rep(0.1, 10000), + Prop = rep(2.3e-6, 10000) + ), cell.libSizes = rep(70000, 100), cell.libMod = 1, de.nGenes = 0, diff --git a/R/Lun2Params-methods.R b/R/Lun2Params-methods.R index b655884d125b6a7ef1939ceaea5a6274d68a02e1..3e583999c5bbcf786ff81a5506ebc8ce7e29be33 100644 --- a/R/Lun2Params-methods.R +++ b/R/Lun2Params-methods.R @@ -9,7 +9,7 @@ newLun2Params <- function(...) { return(params) } -#' @importFrom checkmate checkInt checkNumber checkNumeric checkLogical +#' @importFrom checkmate checkInt checkNumber checkNumeric checkDataFrame #' checkCharacter checkFactor setValidity("Lun2Params", function(object) { @@ -25,23 +25,30 @@ setValidity("Lun2Params", function(object) { plate.ingroup = checkCharacter(v$plate.ingroup, min.len = 1), plate.mod = checkNumber(v$plate.mod, lower = 0), plate.var = checkNumber(v$plate.var, lower = 0), - gene.means = checkNumeric(v$gene.means, lower = 0, - len = nGenes), - gene.disps = checkNumeric(v$gene.disps, lower = 0, - len = nGenes), - gene.ziMeans = checkNumeric(v$gene.ziMeans, lower = 0, - len = nGenes), - gene.ziDisps = checkNumeric(v$gene.ziDisps, lower = 0, - len = nGenes), - gene.ziProps = checkNumeric(v$gene.ziProps, lower = 0, - len = nGenes), + gene.params = checkDataFrame(v$gene.params, + types = "numeric", + any.missing = FALSE, + min.rows = 1, ncols = 2), + zi.params = checkDataFrame(v$zi.params, + types = "numeric", + any.missing = FALSE, + min.rows = 1, ncols = 3), cell.plates = checkFactor(v$cell.plates, len = nCells), cell.libSizes = checkNumeric(v$cell.libSizes, lower = 0, - len = nCells), + min.len = 1, any.missing = FALSE, + finite = TRUE), cell.libMod = checkNumber(v$cell.libMod, lower = 0), de.nGene = checkInt(v$de.nGenes, lower = 0), de.fc = checkNumber(v$de.fc, lower = 0)) + if (!all(colnames(v$gene.params) == c("Mean", "Disp"))) { + checks <- c(checks, gene.params = "Incorrect column names") + } + + if (!all(colnames(v$zi.params) == c("Mean", "Disp", "Prop"))) { + checks <- c(checks, gene.params = "Incorrect column names") + } + if (all(checks == TRUE)) { valid <- TRUE } else { @@ -62,36 +69,11 @@ setMethod("setParam", "Lun2Params", function(object, name, value) { } if (name == "cell.plates") { - old.nCells <- getParam(object, "nCells") object <- setParamUnchecked(object, "nCells", length(value)) object <- setParamUnchecked(object, "nPlates", length(unique(value))) - if (length(value) != old.nCells) { - warning("nCells has been changed. cell.libSizes will be sampled ", - "to length nCells") - selected <- sample(seq_len(old.nCells), length(value), - replace = TRUE) - old.libSizes <- getParam(object, "cell.libSizes") - object <- setParamUnchecked(object, "cell.libSizes", - old.libSizes[selected]) - } value <- factor(value) } - if (name == "nGenes") { - old.nGenes <- getParam(object, "nGenes") - if (value != old.nGenes) { - warning("nGenes has been changed. Gene parameter vectors will be ", - "sampled to length new nGenes.") - selected <- sample(seq_len(old.nGenes), size = value, - replace = TRUE) - for (parameter in grep("gene", slotNames(object), value = TRUE)) { - old.value <- getParam(object, parameter) - object <- setParamUnchecked(object, parameter, - old.value[selected]) - } - } - } - object <- callNextMethod() return(object) @@ -102,11 +84,6 @@ setMethod("show", "Lun2Params", function(object) { pp <- list("Plates:" = c("[Number]" = "nPlates", "[Modifier]" = "plate.mod", "(Variance)" = "plate.var"), - "Genes:" = c("(Means)" = "gene.means", - "(Dispersions)" = "gene.disps", - "(ZI Means)" = "gene.ziMeans", - "(ZI Disps)" = "gene.ziDisps", - "(ZI Props)" = "gene.ziProps"), "Cells:" = c("[Plates]" = "cell.plates", "(Library Sizes)" = "cell.libSizes", "[Lib Size Mod]" = "cell.libMod"), @@ -114,5 +91,18 @@ setMethod("show", "Lun2Params", function(object) { "[Fold change]" = "de.fc")) callNextMethod() + + gene.params <- getParam(object, "gene.params") + zi.params <- getParam(object, "zi.params") + cat("Genes:", "\n") + cat("(Params)", "\n") + cat("data.frame with", dim(gene.params)[1], "features\n") + print(head(gene.params, n = 3)) + cat(" ... ...\n") + cat("(ZI Params)", "\n") + cat("data.frame with", dim(zi.params)[1], "features\n") + print(head(zi.params, n = 3)) + cat(" ... ... ...\n\n") + showPP(object, pp) }) diff --git a/R/lun2-estimate.R b/R/lun2-estimate.R index 6a5f53009fe549522faabb49f478b03430fc0849..ffd4155c3c480df72d7a34315b57cabfcf707838 100644 --- a/R/lun2-estimate.R +++ b/R/lun2-estimate.R @@ -71,7 +71,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), progress <- progress && verbose checkmate::assertClass(params, "Lun2Params") - checkmate::assertInt(min.size, lower = 20, upper = length(plates)) + checkmate::assertInt(min.size, lower = 1, upper = length(plates)) checkmate::assertIntegerish(plates, len = ncol(counts)) if (length(unique(plates)) < 2) { @@ -185,12 +185,13 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop)) - params <- setParams(params, nGenes = nrow(counts), + params <- setParams(params, nGenes = length(logmeans), cell.plates = plates, plate.var = sigma2, - gene.means = exp(logmeans), - gene.disps = dge$tagwise.dispersion, - gene.ziMeans = zinb.mean, gene.ziDisps = zinb.disp, - gene.ziProps = zinb.prop, + gene.params = data.frame(Mean = exp(logmeans), + Disp = dge$tagwise.dispersion), + zi.params = data.frame(Mean = zinb.mean, + Disp = zinb.disp, + Prop = zinb.prop), cell.libSizes = dge$samples$lib.size) return(params) diff --git a/R/lun2-simulate.R b/R/lun2-simulate.R index 7a6fb58a4dd03d76c5201b69ee4bca2931d5dc1c..bc8d9b696ad8eeb0086c664ca8aa2028d281cd03 100644 --- a/R/lun2-simulate.R +++ b/R/lun2-simulate.R @@ -19,6 +19,13 @@ #' Library size factors are also applied and optionally a zero-inflated #' negative-binomial can be used. #' +#' If the number of genes to simulate differs from the number of provied gene +#' parameters or the number of cells to simulate differs from the number of +#' library sizes the relevant paramters will be sampled with a warning. This +#' allows any number of genes or cells to be simulated regardless of the +#' number in the dataset used in the estimation step but has the downside that +#' some genes or cells may be simulated multiple times. +#' #' @return SCESet containing simulated counts. #' #' @references @@ -52,17 +59,54 @@ lun2Simulate <- function(params = newLun2Params(), zinb = FALSE, plate.var <- getParam(params, "plate.var") / getParam(params, "plate.mod") lib.sizes <- getParam(params, "cell.libSizes") lib.mod <- getParam(params, "cell.libMod") + + # Sample lib.sizes if necessary + if (length(lib.sizes) != nCells) { + warning("Number of lib.sizes not equal to nCells. ", + "lib.sizes will be sampled.") + selected <- sample(length(lib.sizes), nCells, replace = TRUE) + libSizes <- lib.sizes[selected] + } + # Select gene parameters depending on model - if (!zinb) { - gene.means <- getParam(params, "gene.means") - gene.disps <- getParam(params, "gene.disps") + if (zinb) { + gene.params <- getParam(params, "zi.params") } else { - gene.means <- getParam(params, "gene.ziMeans") - gene.disps <- getParam(params, "gene.ziDisps") - gene.ziProps <- getParam(params, "gene.ziProps") + gene.params <- getParam(params, "gene.params") + } + + # Sample gene parameters if necessary + if (nrow(gene.params) != nGenes) { + warning("Number of gene parameters does not equal nGenes. ", + "Gene parameters will be sampled.") + selected <- sample(nrow(gene.params), nGenes, replace = TRUE) + gene.params <- gene.params[selected, ] } + + gene.means <- gene.params$Mean + gene.disps <- gene.params$Disp + if (zinb) { + gene.ziProps <- gene.params$Prop + } + de.nGenes <- getParam(params, "de.nGenes") + #if (name == "nGenes") { + # old.nGenes <- getParam(object, "nGenes") + # if (value != old.nGenes) { + # warning("nGenes has been changed. Gene parameter vectors will be ", + # "sampled to length new nGenes.") + # selected <- sample(seq_len(old.nGenes), size = value, + # replace = TRUE) + # for (parameter in grep("gene", slotNames(object), value = TRUE)) { + # old.value <- getParam(object, parameter) + # object <- setParamUnchecked(object, parameter, + # old.value[selected]) + # } + # } + #} + + # Set up objects to store intermediate values cell.names <- paste0("Cell", seq_len(nCells)) gene.names <- paste0("Gene", seq_len(nGenes)) diff --git a/man/Lun2Params.Rd b/man/Lun2Params.Rd index 8b648606bb0e7295adf8a52a0f8c98eee37a6de9..c49a92d153c5606e0cd32dc103a2f74fa7fcc66c 100644 --- a/man/Lun2Params.Rd +++ b/man/Lun2Params.Rd @@ -17,6 +17,17 @@ The Lun2 simulation uses the following parameters: \item{\code{nGenes}}{The number of genes to simulate.} \item{\code{nCells}}{The number of cells to simulate.} \item{\code{[seed]}}{Seed to use for generating random numbers.} + \item{\emph{Gene parameters}}{ + \describe{ + \item{\code{gene.params}}{A \code{data.frame} containing gene + parameters with two coloumns: \code{Mean} (mean expression for + each gene) and \code{Disp} (dispersion for each gene).} + \item{\code{zi.params}}{A \code{data.frame} containing + zero-inflated gene parameters with three coloumns: \code{Mean} + (mean expression for each gene), \code{Disp} (dispersion for + each, gene), and \code{Prop} (zero proportion for each gene).} + } + } \item{\code{[nPlates]}}{The number of plates to simulate.} \item{\emph{Plate parameters}}{ \describe{ @@ -27,15 +38,6 @@ The Lun2 simulation uses the following parameters: \item{\code{plate.var}}{Plate effect variance.} } } - \item{\emph{Gene parameters}}{ - \describe{ - \item{\code{gene.means}}{Mean expression for each gene.} - \item{\code{gene.disps}}{Dispersion for each gene.} - \item{\code{gene.ziMeans}}{Zero-inflated gene means.} - \item{\code{gene.ziDisps}}{Zero-inflated gene dispersions.} - \item{\code{gene.ziProps}}{Zero-inflated gene zero proportions.} - } - } \item{\emph{Cell parameters}}{ \describe{ \item{\code{cell.plates}}{Factor giving the plate that each cell diff --git a/man/lun2Simulate.Rd b/man/lun2Simulate.Rd index cd48f05ea48f44b93e348a046044c7724d17a48a..491ba6787afdfc9bf75f949d3680225f4f15b6e3 100644 --- a/man/lun2Simulate.Rd +++ b/man/lun2Simulate.Rd @@ -32,6 +32,13 @@ simulation is the addition of plate effects. Differential expression can be added between two groups of plates (an "ingroup" and all other plates). Library size factors are also applied and optionally a zero-inflated negative-binomial can be used. + +If the number of genes to simulate differs from the number of provied gene +parameters or the number of cells to simulate differs from the number of +library sizes the relevant paramters will be sampled with a warning. This +allows any number of genes or cells to be simulated regardless of the +number in the dataset used in the estimation step but has the downside that +some genes or cells may be simulated multiple times. } \examples{ sim <- lun2Simulate() diff --git a/tests/testthat/test-Lun2Params.R b/tests/testthat/test-Lun2Params.R index b27d2462211826f1a37644972d6e7744024f3172..5fb60621ad864a0f04ae7f1b2297f7a2b37dc167 100644 --- a/tests/testthat/test-Lun2Params.R +++ b/tests/testthat/test-Lun2Params.R @@ -8,16 +8,11 @@ test_that("nCells checks work", { "nPlates cannot be set directly, set cell.plates instead") }) -test_that("cell sampling works", { +test_that("gene.params checks work", { params <- newLun2Params() - expect_warning(setParam(params, "cell.plates", 1), - paste("nCells has been changed. cell.libSizes will be", - "sampled to length nCells")) + expect_error(setParam(params, "gene.params", data.frame(A = 1, B = 1)), + "gene.params: Incorrect column names") + expect_error(setParam(params, "gene.params", + data.frame(Mean = 1, Disp = "a")), + "gene.params: May only contain the following types: numeric") }) - -test_that("gene sampling works", { - params <- newLun2Params() - expect_warning(setParam(params, "nGenes", 1), - paste("nGenes has been changed. Gene parameter vectors will", - "be sampled to length new nGenes.")) -}) \ No newline at end of file diff --git a/tests/testthat/test-lun2-simulate.R b/tests/testthat/test-lun2-simulate.R index 60508ee35b5701f21881fd19c0d7ccb1df6ca081..93aec1512fb2bb5db583c41b2940747c94ebc740 100644 --- a/tests/testthat/test-lun2-simulate.R +++ b/tests/testthat/test-lun2-simulate.R @@ -4,4 +4,16 @@ test_that("Lun2 simulation output is valid", { expect_true(validObject(lun2Simulate())) expect_true(validObject(lun2Simulate(de.nGenes = 100))) expect_true(validObject(lun2Simulate(zinb = TRUE))) -}) \ No newline at end of file +}) + +test_that("Gene sampling works", { + expect_warning(lun2Simulate(nGenes = 10), + paste("Number of gene parameters does not equal nGenes.", + "Gene parameters will be sampled.")) +}) + +test_that("Cell sampling works", { + expect_warning(lun2Simulate(cell.plates = c(1, 1, 2)), + paste("Number of lib.sizes not equal to nCells.", + "lib.sizes will be sampled.")) +})