From 52c2e8eb9adf30fe79f490093eee04fe1e08245d Mon Sep 17 00:00:00 2001 From: Luke Zappia <luke.zappia@mcri.edu.au> Date: Sat, 25 Mar 2017 11:39:08 +0000 Subject: [PATCH] Merge branch 'master' into devel * master: Version 0.99.13 Run checks Update Lun2 documentation Modify Lun2Params to use data.frame From: Luke Zappia <lazappi@users.noreply.github.com> git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/splatter@127694 bc3139a8-67e5-0310-9ffc-ced21a209358 --- DESCRIPTION | 4 +- NAMESPACE | 2 +- NEWS.md | 6 +++ R/AllClasses.R | 39 +++++++-------- R/Lun2Params-methods.R | 74 +++++++++++++---------------- R/lun2-estimate.R | 13 ++--- R/lun2-simulate.R | 56 +++++++++++++++++++--- man/Lun2Params.Rd | 20 ++++---- man/lun2Simulate.Rd | 7 +++ tests/testthat/test-Lun2Params.R | 17 +++---- tests/testthat/test-lun2-simulate.R | 14 +++++- 11 files changed, 155 insertions(+), 97 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5b99b77..e4ed84e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 0.99.12 -Date: 2017-03-22 +Version: 0.99.13 +Date: 2017-03-25 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index c00aebb..2fac85b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,11 +50,11 @@ importFrom(BiocParallel,SerialParam) importFrom(BiocParallel,bplapply) importFrom(checkmate,checkCharacter) importFrom(checkmate,checkClass) +importFrom(checkmate,checkDataFrame) importFrom(checkmate,checkFactor) importFrom(checkmate,checkFlag) importFrom(checkmate,checkInt) importFrom(checkmate,checkIntegerish) -importFrom(checkmate,checkLogical) importFrom(checkmate,checkNumber) importFrom(checkmate,checkNumeric) importFrom(ggplot2,aes_string) diff --git a/NEWS.md b/NEWS.md index 308481d..124ce57 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# 0.99.13 + +* Modify how Lun2Params stores gene paramters to use data.frame +* Move sampling of genes/cells to lun2Simulate +* Return to old Lun2 nGenes estimate + # 0.99.12 * Add diffSCESets function diff --git a/R/AllClasses.R b/R/AllClasses.R index 568b391..5084077 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -294,6 +294,17 @@ setClass("LunParams", #' \item{\code{nGenes}}{The number of genes to simulate.} #' \item{\code{nCells}}{The number of cells to simulate.} #' \item{\code{[seed]}}{Seed to use for generating random numbers.} +#' \item{\emph{Gene parameters}}{ +#' \describe{ +#' \item{\code{gene.params}}{A \code{data.frame} containing gene +#' parameters with two coloumns: \code{Mean} (mean expression for +#' each gene) and \code{Disp} (dispersion for each gene).} +#' \item{\code{zi.params}}{A \code{data.frame} containing +#' zero-inflated gene parameters with three coloumns: \code{Mean} +#' (mean expression for each gene), \code{Disp} (dispersion for +#' each, gene), and \code{Prop} (zero proportion for each gene).} +#' } +#' } #' \item{\code{[nPlates]}}{The number of plates to simulate.} #' \item{\emph{Plate parameters}}{ #' \describe{ @@ -304,15 +315,6 @@ setClass("LunParams", #' \item{\code{plate.var}}{Plate effect variance.} #' } #' } -#' \item{\emph{Gene parameters}}{ -#' \describe{ -#' \item{\code{gene.means}}{Mean expression for each gene.} -#' \item{\code{gene.disps}}{Dispersion for each gene.} -#' \item{\code{gene.ziMeans}}{Zero-inflated gene means.} -#' \item{\code{gene.ziDisps}}{Zero-inflated gene dispersions.} -#' \item{\code{gene.ziProps}}{Zero-inflated gene zero proportions.} -#' } -#' } #' \item{\emph{Cell parameters}}{ #' \describe{ #' \item{\code{cell.plates}}{Factor giving the plate that each cell @@ -346,11 +348,8 @@ setClass("Lun2Params", plate.ingroup = "character", plate.mod = "numeric", plate.var = "numeric", - gene.means = "numeric", - gene.disps = "numeric", - gene.ziMeans = "numeric", - gene.ziDisps = "numeric", - gene.ziProps = "numeric", + gene.params = "data.frame", + zi.params = "data.frame", cell.plates = "numeric", cell.libSizes = "numeric", cell.libMod = "numeric", @@ -361,11 +360,13 @@ setClass("Lun2Params", plate.ingroup = "1", plate.mod = 1, plate.var = 14, - gene.means = rep(3.2, 10000), - gene.disps = rep(0.03, 10000), - gene.ziMeans = rep(1.6, 10000), - gene.ziDisps = rep(0.1, 10000), - gene.ziProps = rep(2.3e-6, 10000), + gene.params = data.frame(Mean = rep(3.2, 10000), + Disp = rep(0.03, 10000) + ), + zi.params = data.frame(Mean = rep(1.6, 10000), + Disp = rep(0.1, 10000), + Prop = rep(2.3e-6, 10000) + ), cell.libSizes = rep(70000, 100), cell.libMod = 1, de.nGenes = 0, diff --git a/R/Lun2Params-methods.R b/R/Lun2Params-methods.R index b655884..3e58399 100644 --- a/R/Lun2Params-methods.R +++ b/R/Lun2Params-methods.R @@ -9,7 +9,7 @@ newLun2Params <- function(...) { return(params) } -#' @importFrom checkmate checkInt checkNumber checkNumeric checkLogical +#' @importFrom checkmate checkInt checkNumber checkNumeric checkDataFrame #' checkCharacter checkFactor setValidity("Lun2Params", function(object) { @@ -25,23 +25,30 @@ setValidity("Lun2Params", function(object) { plate.ingroup = checkCharacter(v$plate.ingroup, min.len = 1), plate.mod = checkNumber(v$plate.mod, lower = 0), plate.var = checkNumber(v$plate.var, lower = 0), - gene.means = checkNumeric(v$gene.means, lower = 0, - len = nGenes), - gene.disps = checkNumeric(v$gene.disps, lower = 0, - len = nGenes), - gene.ziMeans = checkNumeric(v$gene.ziMeans, lower = 0, - len = nGenes), - gene.ziDisps = checkNumeric(v$gene.ziDisps, lower = 0, - len = nGenes), - gene.ziProps = checkNumeric(v$gene.ziProps, lower = 0, - len = nGenes), + gene.params = checkDataFrame(v$gene.params, + types = "numeric", + any.missing = FALSE, + min.rows = 1, ncols = 2), + zi.params = checkDataFrame(v$zi.params, + types = "numeric", + any.missing = FALSE, + min.rows = 1, ncols = 3), cell.plates = checkFactor(v$cell.plates, len = nCells), cell.libSizes = checkNumeric(v$cell.libSizes, lower = 0, - len = nCells), + min.len = 1, any.missing = FALSE, + finite = TRUE), cell.libMod = checkNumber(v$cell.libMod, lower = 0), de.nGene = checkInt(v$de.nGenes, lower = 0), de.fc = checkNumber(v$de.fc, lower = 0)) + if (!all(colnames(v$gene.params) == c("Mean", "Disp"))) { + checks <- c(checks, gene.params = "Incorrect column names") + } + + if (!all(colnames(v$zi.params) == c("Mean", "Disp", "Prop"))) { + checks <- c(checks, gene.params = "Incorrect column names") + } + if (all(checks == TRUE)) { valid <- TRUE } else { @@ -62,36 +69,11 @@ setMethod("setParam", "Lun2Params", function(object, name, value) { } if (name == "cell.plates") { - old.nCells <- getParam(object, "nCells") object <- setParamUnchecked(object, "nCells", length(value)) object <- setParamUnchecked(object, "nPlates", length(unique(value))) - if (length(value) != old.nCells) { - warning("nCells has been changed. cell.libSizes will be sampled ", - "to length nCells") - selected <- sample(seq_len(old.nCells), length(value), - replace = TRUE) - old.libSizes <- getParam(object, "cell.libSizes") - object <- setParamUnchecked(object, "cell.libSizes", - old.libSizes[selected]) - } value <- factor(value) } - if (name == "nGenes") { - old.nGenes <- getParam(object, "nGenes") - if (value != old.nGenes) { - warning("nGenes has been changed. Gene parameter vectors will be ", - "sampled to length new nGenes.") - selected <- sample(seq_len(old.nGenes), size = value, - replace = TRUE) - for (parameter in grep("gene", slotNames(object), value = TRUE)) { - old.value <- getParam(object, parameter) - object <- setParamUnchecked(object, parameter, - old.value[selected]) - } - } - } - object <- callNextMethod() return(object) @@ -102,11 +84,6 @@ setMethod("show", "Lun2Params", function(object) { pp <- list("Plates:" = c("[Number]" = "nPlates", "[Modifier]" = "plate.mod", "(Variance)" = "plate.var"), - "Genes:" = c("(Means)" = "gene.means", - "(Dispersions)" = "gene.disps", - "(ZI Means)" = "gene.ziMeans", - "(ZI Disps)" = "gene.ziDisps", - "(ZI Props)" = "gene.ziProps"), "Cells:" = c("[Plates]" = "cell.plates", "(Library Sizes)" = "cell.libSizes", "[Lib Size Mod]" = "cell.libMod"), @@ -114,5 +91,18 @@ setMethod("show", "Lun2Params", function(object) { "[Fold change]" = "de.fc")) callNextMethod() + + gene.params <- getParam(object, "gene.params") + zi.params <- getParam(object, "zi.params") + cat("Genes:", "\n") + cat("(Params)", "\n") + cat("data.frame with", dim(gene.params)[1], "features\n") + print(head(gene.params, n = 3)) + cat(" ... ...\n") + cat("(ZI Params)", "\n") + cat("data.frame with", dim(zi.params)[1], "features\n") + print(head(zi.params, n = 3)) + cat(" ... ... ...\n\n") + showPP(object, pp) }) diff --git a/R/lun2-estimate.R b/R/lun2-estimate.R index 6a5f530..ffd4155 100644 --- a/R/lun2-estimate.R +++ b/R/lun2-estimate.R @@ -71,7 +71,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), progress <- progress && verbose checkmate::assertClass(params, "Lun2Params") - checkmate::assertInt(min.size, lower = 20, upper = length(plates)) + checkmate::assertInt(min.size, lower = 1, upper = length(plates)) checkmate::assertIntegerish(plates, len = ncol(counts)) if (length(unique(plates)) < 2) { @@ -185,12 +185,13 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop)) - params <- setParams(params, nGenes = nrow(counts), + params <- setParams(params, nGenes = length(logmeans), cell.plates = plates, plate.var = sigma2, - gene.means = exp(logmeans), - gene.disps = dge$tagwise.dispersion, - gene.ziMeans = zinb.mean, gene.ziDisps = zinb.disp, - gene.ziProps = zinb.prop, + gene.params = data.frame(Mean = exp(logmeans), + Disp = dge$tagwise.dispersion), + zi.params = data.frame(Mean = zinb.mean, + Disp = zinb.disp, + Prop = zinb.prop), cell.libSizes = dge$samples$lib.size) return(params) diff --git a/R/lun2-simulate.R b/R/lun2-simulate.R index 7a6fb58..bc8d9b6 100644 --- a/R/lun2-simulate.R +++ b/R/lun2-simulate.R @@ -19,6 +19,13 @@ #' Library size factors are also applied and optionally a zero-inflated #' negative-binomial can be used. #' +#' If the number of genes to simulate differs from the number of provied gene +#' parameters or the number of cells to simulate differs from the number of +#' library sizes the relevant paramters will be sampled with a warning. This +#' allows any number of genes or cells to be simulated regardless of the +#' number in the dataset used in the estimation step but has the downside that +#' some genes or cells may be simulated multiple times. +#' #' @return SCESet containing simulated counts. #' #' @references @@ -52,17 +59,54 @@ lun2Simulate <- function(params = newLun2Params(), zinb = FALSE, plate.var <- getParam(params, "plate.var") / getParam(params, "plate.mod") lib.sizes <- getParam(params, "cell.libSizes") lib.mod <- getParam(params, "cell.libMod") + + # Sample lib.sizes if necessary + if (length(lib.sizes) != nCells) { + warning("Number of lib.sizes not equal to nCells. ", + "lib.sizes will be sampled.") + selected <- sample(length(lib.sizes), nCells, replace = TRUE) + libSizes <- lib.sizes[selected] + } + # Select gene parameters depending on model - if (!zinb) { - gene.means <- getParam(params, "gene.means") - gene.disps <- getParam(params, "gene.disps") + if (zinb) { + gene.params <- getParam(params, "zi.params") } else { - gene.means <- getParam(params, "gene.ziMeans") - gene.disps <- getParam(params, "gene.ziDisps") - gene.ziProps <- getParam(params, "gene.ziProps") + gene.params <- getParam(params, "gene.params") + } + + # Sample gene parameters if necessary + if (nrow(gene.params) != nGenes) { + warning("Number of gene parameters does not equal nGenes. ", + "Gene parameters will be sampled.") + selected <- sample(nrow(gene.params), nGenes, replace = TRUE) + gene.params <- gene.params[selected, ] } + + gene.means <- gene.params$Mean + gene.disps <- gene.params$Disp + if (zinb) { + gene.ziProps <- gene.params$Prop + } + de.nGenes <- getParam(params, "de.nGenes") + #if (name == "nGenes") { + # old.nGenes <- getParam(object, "nGenes") + # if (value != old.nGenes) { + # warning("nGenes has been changed. Gene parameter vectors will be ", + # "sampled to length new nGenes.") + # selected <- sample(seq_len(old.nGenes), size = value, + # replace = TRUE) + # for (parameter in grep("gene", slotNames(object), value = TRUE)) { + # old.value <- getParam(object, parameter) + # object <- setParamUnchecked(object, parameter, + # old.value[selected]) + # } + # } + #} + + # Set up objects to store intermediate values cell.names <- paste0("Cell", seq_len(nCells)) gene.names <- paste0("Gene", seq_len(nGenes)) diff --git a/man/Lun2Params.Rd b/man/Lun2Params.Rd index 8b64860..c49a92d 100644 --- a/man/Lun2Params.Rd +++ b/man/Lun2Params.Rd @@ -17,6 +17,17 @@ The Lun2 simulation uses the following parameters: \item{\code{nGenes}}{The number of genes to simulate.} \item{\code{nCells}}{The number of cells to simulate.} \item{\code{[seed]}}{Seed to use for generating random numbers.} + \item{\emph{Gene parameters}}{ + \describe{ + \item{\code{gene.params}}{A \code{data.frame} containing gene + parameters with two coloumns: \code{Mean} (mean expression for + each gene) and \code{Disp} (dispersion for each gene).} + \item{\code{zi.params}}{A \code{data.frame} containing + zero-inflated gene parameters with three coloumns: \code{Mean} + (mean expression for each gene), \code{Disp} (dispersion for + each, gene), and \code{Prop} (zero proportion for each gene).} + } + } \item{\code{[nPlates]}}{The number of plates to simulate.} \item{\emph{Plate parameters}}{ \describe{ @@ -27,15 +38,6 @@ The Lun2 simulation uses the following parameters: \item{\code{plate.var}}{Plate effect variance.} } } - \item{\emph{Gene parameters}}{ - \describe{ - \item{\code{gene.means}}{Mean expression for each gene.} - \item{\code{gene.disps}}{Dispersion for each gene.} - \item{\code{gene.ziMeans}}{Zero-inflated gene means.} - \item{\code{gene.ziDisps}}{Zero-inflated gene dispersions.} - \item{\code{gene.ziProps}}{Zero-inflated gene zero proportions.} - } - } \item{\emph{Cell parameters}}{ \describe{ \item{\code{cell.plates}}{Factor giving the plate that each cell diff --git a/man/lun2Simulate.Rd b/man/lun2Simulate.Rd index cd48f05..491ba67 100644 --- a/man/lun2Simulate.Rd +++ b/man/lun2Simulate.Rd @@ -32,6 +32,13 @@ simulation is the addition of plate effects. Differential expression can be added between two groups of plates (an "ingroup" and all other plates). Library size factors are also applied and optionally a zero-inflated negative-binomial can be used. + +If the number of genes to simulate differs from the number of provied gene +parameters or the number of cells to simulate differs from the number of +library sizes the relevant paramters will be sampled with a warning. This +allows any number of genes or cells to be simulated regardless of the +number in the dataset used in the estimation step but has the downside that +some genes or cells may be simulated multiple times. } \examples{ sim <- lun2Simulate() diff --git a/tests/testthat/test-Lun2Params.R b/tests/testthat/test-Lun2Params.R index b27d246..5fb6062 100644 --- a/tests/testthat/test-Lun2Params.R +++ b/tests/testthat/test-Lun2Params.R @@ -8,16 +8,11 @@ test_that("nCells checks work", { "nPlates cannot be set directly, set cell.plates instead") }) -test_that("cell sampling works", { +test_that("gene.params checks work", { params <- newLun2Params() - expect_warning(setParam(params, "cell.plates", 1), - paste("nCells has been changed. cell.libSizes will be", - "sampled to length nCells")) + expect_error(setParam(params, "gene.params", data.frame(A = 1, B = 1)), + "gene.params: Incorrect column names") + expect_error(setParam(params, "gene.params", + data.frame(Mean = 1, Disp = "a")), + "gene.params: May only contain the following types: numeric") }) - -test_that("gene sampling works", { - params <- newLun2Params() - expect_warning(setParam(params, "nGenes", 1), - paste("nGenes has been changed. Gene parameter vectors will", - "be sampled to length new nGenes.")) -}) \ No newline at end of file diff --git a/tests/testthat/test-lun2-simulate.R b/tests/testthat/test-lun2-simulate.R index 60508ee..93aec15 100644 --- a/tests/testthat/test-lun2-simulate.R +++ b/tests/testthat/test-lun2-simulate.R @@ -4,4 +4,16 @@ test_that("Lun2 simulation output is valid", { expect_true(validObject(lun2Simulate())) expect_true(validObject(lun2Simulate(de.nGenes = 100))) expect_true(validObject(lun2Simulate(zinb = TRUE))) -}) \ No newline at end of file +}) + +test_that("Gene sampling works", { + expect_warning(lun2Simulate(nGenes = 10), + paste("Number of gene parameters does not equal nGenes.", + "Gene parameters will be sampled.")) +}) + +test_that("Cell sampling works", { + expect_warning(lun2Simulate(cell.plates = c(1, 1, 2)), + paste("Number of lib.sizes not equal to nCells.", + "lib.sizes will be sampled.")) +}) -- GitLab