Skip to content
Snippets Groups Projects
Commit d24dbb5c authored by l.zappia's avatar l.zappia
Browse files

Merge branch 'master' into devel

* master:
  Version 0.99.13
  Run checks
  Update Lun2 documentation
  Modify Lun2Params to use data.frame

From: Luke Zappia <lazappi@users.noreply.github.com>

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/splatter@127694 bc3139a8-67e5-0310-9ffc-ced21a209358
parent 04343ac0
No related branches found
No related tags found
No related merge requests found
Package: splatter
Type: Package
Title: Simple Simulation of Single-cell RNA Sequencing Data
Version: 0.99.12
Date: 2017-03-22
Version: 0.99.13
Date: 2017-03-25
Author: Luke Zappia
Authors@R:
c(person("Luke", "Zappia", role = c("aut", "cre"),
......
......@@ -50,11 +50,11 @@ importFrom(BiocParallel,SerialParam)
importFrom(BiocParallel,bplapply)
importFrom(checkmate,checkCharacter)
importFrom(checkmate,checkClass)
importFrom(checkmate,checkDataFrame)
importFrom(checkmate,checkFactor)
importFrom(checkmate,checkFlag)
importFrom(checkmate,checkInt)
importFrom(checkmate,checkIntegerish)
importFrom(checkmate,checkLogical)
importFrom(checkmate,checkNumber)
importFrom(checkmate,checkNumeric)
importFrom(ggplot2,aes_string)
......
# 0.99.13
* Modify how Lun2Params stores gene paramters to use data.frame
* Move sampling of genes/cells to lun2Simulate
* Return to old Lun2 nGenes estimate
# 0.99.12
* Add diffSCESets function
......
......@@ -294,6 +294,17 @@ setClass("LunParams",
#' \item{\code{nGenes}}{The number of genes to simulate.}
#' \item{\code{nCells}}{The number of cells to simulate.}
#' \item{\code{[seed]}}{Seed to use for generating random numbers.}
#' \item{\emph{Gene parameters}}{
#' \describe{
#' \item{\code{gene.params}}{A \code{data.frame} containing gene
#' parameters with two coloumns: \code{Mean} (mean expression for
#' each gene) and \code{Disp} (dispersion for each gene).}
#' \item{\code{zi.params}}{A \code{data.frame} containing
#' zero-inflated gene parameters with three coloumns: \code{Mean}
#' (mean expression for each gene), \code{Disp} (dispersion for
#' each, gene), and \code{Prop} (zero proportion for each gene).}
#' }
#' }
#' \item{\code{[nPlates]}}{The number of plates to simulate.}
#' \item{\emph{Plate parameters}}{
#' \describe{
......@@ -304,15 +315,6 @@ setClass("LunParams",
#' \item{\code{plate.var}}{Plate effect variance.}
#' }
#' }
#' \item{\emph{Gene parameters}}{
#' \describe{
#' \item{\code{gene.means}}{Mean expression for each gene.}
#' \item{\code{gene.disps}}{Dispersion for each gene.}
#' \item{\code{gene.ziMeans}}{Zero-inflated gene means.}
#' \item{\code{gene.ziDisps}}{Zero-inflated gene dispersions.}
#' \item{\code{gene.ziProps}}{Zero-inflated gene zero proportions.}
#' }
#' }
#' \item{\emph{Cell parameters}}{
#' \describe{
#' \item{\code{cell.plates}}{Factor giving the plate that each cell
......@@ -346,11 +348,8 @@ setClass("Lun2Params",
plate.ingroup = "character",
plate.mod = "numeric",
plate.var = "numeric",
gene.means = "numeric",
gene.disps = "numeric",
gene.ziMeans = "numeric",
gene.ziDisps = "numeric",
gene.ziProps = "numeric",
gene.params = "data.frame",
zi.params = "data.frame",
cell.plates = "numeric",
cell.libSizes = "numeric",
cell.libMod = "numeric",
......@@ -361,11 +360,13 @@ setClass("Lun2Params",
plate.ingroup = "1",
plate.mod = 1,
plate.var = 14,
gene.means = rep(3.2, 10000),
gene.disps = rep(0.03, 10000),
gene.ziMeans = rep(1.6, 10000),
gene.ziDisps = rep(0.1, 10000),
gene.ziProps = rep(2.3e-6, 10000),
gene.params = data.frame(Mean = rep(3.2, 10000),
Disp = rep(0.03, 10000)
),
zi.params = data.frame(Mean = rep(1.6, 10000),
Disp = rep(0.1, 10000),
Prop = rep(2.3e-6, 10000)
),
cell.libSizes = rep(70000, 100),
cell.libMod = 1,
de.nGenes = 0,
......
......@@ -9,7 +9,7 @@ newLun2Params <- function(...) {
return(params)
}
#' @importFrom checkmate checkInt checkNumber checkNumeric checkLogical
#' @importFrom checkmate checkInt checkNumber checkNumeric checkDataFrame
#' checkCharacter checkFactor
setValidity("Lun2Params", function(object) {
......@@ -25,23 +25,30 @@ setValidity("Lun2Params", function(object) {
plate.ingroup = checkCharacter(v$plate.ingroup, min.len = 1),
plate.mod = checkNumber(v$plate.mod, lower = 0),
plate.var = checkNumber(v$plate.var, lower = 0),
gene.means = checkNumeric(v$gene.means, lower = 0,
len = nGenes),
gene.disps = checkNumeric(v$gene.disps, lower = 0,
len = nGenes),
gene.ziMeans = checkNumeric(v$gene.ziMeans, lower = 0,
len = nGenes),
gene.ziDisps = checkNumeric(v$gene.ziDisps, lower = 0,
len = nGenes),
gene.ziProps = checkNumeric(v$gene.ziProps, lower = 0,
len = nGenes),
gene.params = checkDataFrame(v$gene.params,
types = "numeric",
any.missing = FALSE,
min.rows = 1, ncols = 2),
zi.params = checkDataFrame(v$zi.params,
types = "numeric",
any.missing = FALSE,
min.rows = 1, ncols = 3),
cell.plates = checkFactor(v$cell.plates, len = nCells),
cell.libSizes = checkNumeric(v$cell.libSizes, lower = 0,
len = nCells),
min.len = 1, any.missing = FALSE,
finite = TRUE),
cell.libMod = checkNumber(v$cell.libMod, lower = 0),
de.nGene = checkInt(v$de.nGenes, lower = 0),
de.fc = checkNumber(v$de.fc, lower = 0))
if (!all(colnames(v$gene.params) == c("Mean", "Disp"))) {
checks <- c(checks, gene.params = "Incorrect column names")
}
if (!all(colnames(v$zi.params) == c("Mean", "Disp", "Prop"))) {
checks <- c(checks, gene.params = "Incorrect column names")
}
if (all(checks == TRUE)) {
valid <- TRUE
} else {
......@@ -62,36 +69,11 @@ setMethod("setParam", "Lun2Params", function(object, name, value) {
}
if (name == "cell.plates") {
old.nCells <- getParam(object, "nCells")
object <- setParamUnchecked(object, "nCells", length(value))
object <- setParamUnchecked(object, "nPlates", length(unique(value)))
if (length(value) != old.nCells) {
warning("nCells has been changed. cell.libSizes will be sampled ",
"to length nCells")
selected <- sample(seq_len(old.nCells), length(value),
replace = TRUE)
old.libSizes <- getParam(object, "cell.libSizes")
object <- setParamUnchecked(object, "cell.libSizes",
old.libSizes[selected])
}
value <- factor(value)
}
if (name == "nGenes") {
old.nGenes <- getParam(object, "nGenes")
if (value != old.nGenes) {
warning("nGenes has been changed. Gene parameter vectors will be ",
"sampled to length new nGenes.")
selected <- sample(seq_len(old.nGenes), size = value,
replace = TRUE)
for (parameter in grep("gene", slotNames(object), value = TRUE)) {
old.value <- getParam(object, parameter)
object <- setParamUnchecked(object, parameter,
old.value[selected])
}
}
}
object <- callNextMethod()
return(object)
......@@ -102,11 +84,6 @@ setMethod("show", "Lun2Params", function(object) {
pp <- list("Plates:" = c("[Number]" = "nPlates",
"[Modifier]" = "plate.mod",
"(Variance)" = "plate.var"),
"Genes:" = c("(Means)" = "gene.means",
"(Dispersions)" = "gene.disps",
"(ZI Means)" = "gene.ziMeans",
"(ZI Disps)" = "gene.ziDisps",
"(ZI Props)" = "gene.ziProps"),
"Cells:" = c("[Plates]" = "cell.plates",
"(Library Sizes)" = "cell.libSizes",
"[Lib Size Mod]" = "cell.libMod"),
......@@ -114,5 +91,18 @@ setMethod("show", "Lun2Params", function(object) {
"[Fold change]" = "de.fc"))
callNextMethod()
gene.params <- getParam(object, "gene.params")
zi.params <- getParam(object, "zi.params")
cat("Genes:", "\n")
cat("(Params)", "\n")
cat("data.frame with", dim(gene.params)[1], "features\n")
print(head(gene.params, n = 3))
cat(" ... ...\n")
cat("(ZI Params)", "\n")
cat("data.frame with", dim(zi.params)[1], "features\n")
print(head(zi.params, n = 3))
cat(" ... ... ...\n\n")
showPP(object, pp)
})
......@@ -71,7 +71,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
progress <- progress && verbose
checkmate::assertClass(params, "Lun2Params")
checkmate::assertInt(min.size, lower = 20, upper = length(plates))
checkmate::assertInt(min.size, lower = 1, upper = length(plates))
checkmate::assertIntegerish(plates, len = ncol(counts))
if (length(unique(plates)) < 2) {
......@@ -185,12 +185,13 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop))
params <- setParams(params, nGenes = nrow(counts),
params <- setParams(params, nGenes = length(logmeans),
cell.plates = plates, plate.var = sigma2,
gene.means = exp(logmeans),
gene.disps = dge$tagwise.dispersion,
gene.ziMeans = zinb.mean, gene.ziDisps = zinb.disp,
gene.ziProps = zinb.prop,
gene.params = data.frame(Mean = exp(logmeans),
Disp = dge$tagwise.dispersion),
zi.params = data.frame(Mean = zinb.mean,
Disp = zinb.disp,
Prop = zinb.prop),
cell.libSizes = dge$samples$lib.size)
return(params)
......
......@@ -19,6 +19,13 @@
#' Library size factors are also applied and optionally a zero-inflated
#' negative-binomial can be used.
#'
#' If the number of genes to simulate differs from the number of provied gene
#' parameters or the number of cells to simulate differs from the number of
#' library sizes the relevant paramters will be sampled with a warning. This
#' allows any number of genes or cells to be simulated regardless of the
#' number in the dataset used in the estimation step but has the downside that
#' some genes or cells may be simulated multiple times.
#'
#' @return SCESet containing simulated counts.
#'
#' @references
......@@ -52,17 +59,54 @@ lun2Simulate <- function(params = newLun2Params(), zinb = FALSE,
plate.var <- getParam(params, "plate.var") / getParam(params, "plate.mod")
lib.sizes <- getParam(params, "cell.libSizes")
lib.mod <- getParam(params, "cell.libMod")
# Sample lib.sizes if necessary
if (length(lib.sizes) != nCells) {
warning("Number of lib.sizes not equal to nCells. ",
"lib.sizes will be sampled.")
selected <- sample(length(lib.sizes), nCells, replace = TRUE)
libSizes <- lib.sizes[selected]
}
# Select gene parameters depending on model
if (!zinb) {
gene.means <- getParam(params, "gene.means")
gene.disps <- getParam(params, "gene.disps")
if (zinb) {
gene.params <- getParam(params, "zi.params")
} else {
gene.means <- getParam(params, "gene.ziMeans")
gene.disps <- getParam(params, "gene.ziDisps")
gene.ziProps <- getParam(params, "gene.ziProps")
gene.params <- getParam(params, "gene.params")
}
# Sample gene parameters if necessary
if (nrow(gene.params) != nGenes) {
warning("Number of gene parameters does not equal nGenes. ",
"Gene parameters will be sampled.")
selected <- sample(nrow(gene.params), nGenes, replace = TRUE)
gene.params <- gene.params[selected, ]
}
gene.means <- gene.params$Mean
gene.disps <- gene.params$Disp
if (zinb) {
gene.ziProps <- gene.params$Prop
}
de.nGenes <- getParam(params, "de.nGenes")
#if (name == "nGenes") {
# old.nGenes <- getParam(object, "nGenes")
# if (value != old.nGenes) {
# warning("nGenes has been changed. Gene parameter vectors will be ",
# "sampled to length new nGenes.")
# selected <- sample(seq_len(old.nGenes), size = value,
# replace = TRUE)
# for (parameter in grep("gene", slotNames(object), value = TRUE)) {
# old.value <- getParam(object, parameter)
# object <- setParamUnchecked(object, parameter,
# old.value[selected])
# }
# }
#}
# Set up objects to store intermediate values
cell.names <- paste0("Cell", seq_len(nCells))
gene.names <- paste0("Gene", seq_len(nGenes))
......
......@@ -17,6 +17,17 @@ The Lun2 simulation uses the following parameters:
\item{\code{nGenes}}{The number of genes to simulate.}
\item{\code{nCells}}{The number of cells to simulate.}
\item{\code{[seed]}}{Seed to use for generating random numbers.}
\item{\emph{Gene parameters}}{
\describe{
\item{\code{gene.params}}{A \code{data.frame} containing gene
parameters with two coloumns: \code{Mean} (mean expression for
each gene) and \code{Disp} (dispersion for each gene).}
\item{\code{zi.params}}{A \code{data.frame} containing
zero-inflated gene parameters with three coloumns: \code{Mean}
(mean expression for each gene), \code{Disp} (dispersion for
each, gene), and \code{Prop} (zero proportion for each gene).}
}
}
\item{\code{[nPlates]}}{The number of plates to simulate.}
\item{\emph{Plate parameters}}{
\describe{
......@@ -27,15 +38,6 @@ The Lun2 simulation uses the following parameters:
\item{\code{plate.var}}{Plate effect variance.}
}
}
\item{\emph{Gene parameters}}{
\describe{
\item{\code{gene.means}}{Mean expression for each gene.}
\item{\code{gene.disps}}{Dispersion for each gene.}
\item{\code{gene.ziMeans}}{Zero-inflated gene means.}
\item{\code{gene.ziDisps}}{Zero-inflated gene dispersions.}
\item{\code{gene.ziProps}}{Zero-inflated gene zero proportions.}
}
}
\item{\emph{Cell parameters}}{
\describe{
\item{\code{cell.plates}}{Factor giving the plate that each cell
......
......@@ -32,6 +32,13 @@ simulation is the addition of plate effects. Differential expression can be
added between two groups of plates (an "ingroup" and all other plates).
Library size factors are also applied and optionally a zero-inflated
negative-binomial can be used.
If the number of genes to simulate differs from the number of provied gene
parameters or the number of cells to simulate differs from the number of
library sizes the relevant paramters will be sampled with a warning. This
allows any number of genes or cells to be simulated regardless of the
number in the dataset used in the estimation step but has the downside that
some genes or cells may be simulated multiple times.
}
\examples{
sim <- lun2Simulate()
......
......@@ -8,16 +8,11 @@ test_that("nCells checks work", {
"nPlates cannot be set directly, set cell.plates instead")
})
test_that("cell sampling works", {
test_that("gene.params checks work", {
params <- newLun2Params()
expect_warning(setParam(params, "cell.plates", 1),
paste("nCells has been changed. cell.libSizes will be",
"sampled to length nCells"))
expect_error(setParam(params, "gene.params", data.frame(A = 1, B = 1)),
"gene.params: Incorrect column names")
expect_error(setParam(params, "gene.params",
data.frame(Mean = 1, Disp = "a")),
"gene.params: May only contain the following types: numeric")
})
test_that("gene sampling works", {
params <- newLun2Params()
expect_warning(setParam(params, "nGenes", 1),
paste("nGenes has been changed. Gene parameter vectors will",
"be sampled to length new nGenes."))
})
\ No newline at end of file
......@@ -4,4 +4,16 @@ test_that("Lun2 simulation output is valid", {
expect_true(validObject(lun2Simulate()))
expect_true(validObject(lun2Simulate(de.nGenes = 100)))
expect_true(validObject(lun2Simulate(zinb = TRUE)))
})
\ No newline at end of file
})
test_that("Gene sampling works", {
expect_warning(lun2Simulate(nGenes = 10),
paste("Number of gene parameters does not equal nGenes.",
"Gene parameters will be sampled."))
})
test_that("Cell sampling works", {
expect_warning(lun2Simulate(cell.plates = c(1, 1, 2)),
paste("Number of lib.sizes not equal to nCells.",
"lib.sizes will be sampled."))
})
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment