Skip to content
Snippets Groups Projects
Commit 5bbd4ecc authored by l.zappia's avatar l.zappia
Browse files

Merge branch 'master' into devel

* master:
  Version 0.99.13
  Run checks
  Update Lun2 documentation
  Modify Lun2Params to use data.frame

From: Luke Zappia <lazappi@users.noreply.github.com>

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/splatter@127694 bc3139a8-67e5-0310-9ffc-ced21a209358
parents 36a0a716 36ef9e12
No related branches found
No related tags found
No related merge requests found
Package: splatter Package: splatter
Type: Package Type: Package
Title: Simple Simulation of Single-cell RNA Sequencing Data Title: Simple Simulation of Single-cell RNA Sequencing Data
Version: 0.99.12 Version: 0.99.13
Date: 2017-03-22 Date: 2017-03-25
Author: Luke Zappia Author: Luke Zappia
Authors@R: Authors@R:
c(person("Luke", "Zappia", role = c("aut", "cre"), c(person("Luke", "Zappia", role = c("aut", "cre"),
......
...@@ -50,11 +50,11 @@ importFrom(BiocParallel,SerialParam) ...@@ -50,11 +50,11 @@ importFrom(BiocParallel,SerialParam)
importFrom(BiocParallel,bplapply) importFrom(BiocParallel,bplapply)
importFrom(checkmate,checkCharacter) importFrom(checkmate,checkCharacter)
importFrom(checkmate,checkClass) importFrom(checkmate,checkClass)
importFrom(checkmate,checkDataFrame)
importFrom(checkmate,checkFactor) importFrom(checkmate,checkFactor)
importFrom(checkmate,checkFlag) importFrom(checkmate,checkFlag)
importFrom(checkmate,checkInt) importFrom(checkmate,checkInt)
importFrom(checkmate,checkIntegerish) importFrom(checkmate,checkIntegerish)
importFrom(checkmate,checkLogical)
importFrom(checkmate,checkNumber) importFrom(checkmate,checkNumber)
importFrom(checkmate,checkNumeric) importFrom(checkmate,checkNumeric)
importFrom(ggplot2,aes_string) importFrom(ggplot2,aes_string)
......
# 0.99.13
* Modify how Lun2Params stores gene paramters to use data.frame
* Move sampling of genes/cells to lun2Simulate
* Return to old Lun2 nGenes estimate
# 0.99.12 # 0.99.12
* Add diffSCESets function * Add diffSCESets function
......
...@@ -294,6 +294,17 @@ setClass("LunParams", ...@@ -294,6 +294,17 @@ setClass("LunParams",
#' \item{\code{nGenes}}{The number of genes to simulate.} #' \item{\code{nGenes}}{The number of genes to simulate.}
#' \item{\code{nCells}}{The number of cells to simulate.} #' \item{\code{nCells}}{The number of cells to simulate.}
#' \item{\code{[seed]}}{Seed to use for generating random numbers.} #' \item{\code{[seed]}}{Seed to use for generating random numbers.}
#' \item{\emph{Gene parameters}}{
#' \describe{
#' \item{\code{gene.params}}{A \code{data.frame} containing gene
#' parameters with two coloumns: \code{Mean} (mean expression for
#' each gene) and \code{Disp} (dispersion for each gene).}
#' \item{\code{zi.params}}{A \code{data.frame} containing
#' zero-inflated gene parameters with three coloumns: \code{Mean}
#' (mean expression for each gene), \code{Disp} (dispersion for
#' each, gene), and \code{Prop} (zero proportion for each gene).}
#' }
#' }
#' \item{\code{[nPlates]}}{The number of plates to simulate.} #' \item{\code{[nPlates]}}{The number of plates to simulate.}
#' \item{\emph{Plate parameters}}{ #' \item{\emph{Plate parameters}}{
#' \describe{ #' \describe{
...@@ -304,15 +315,6 @@ setClass("LunParams", ...@@ -304,15 +315,6 @@ setClass("LunParams",
#' \item{\code{plate.var}}{Plate effect variance.} #' \item{\code{plate.var}}{Plate effect variance.}
#' } #' }
#' } #' }
#' \item{\emph{Gene parameters}}{
#' \describe{
#' \item{\code{gene.means}}{Mean expression for each gene.}
#' \item{\code{gene.disps}}{Dispersion for each gene.}
#' \item{\code{gene.ziMeans}}{Zero-inflated gene means.}
#' \item{\code{gene.ziDisps}}{Zero-inflated gene dispersions.}
#' \item{\code{gene.ziProps}}{Zero-inflated gene zero proportions.}
#' }
#' }
#' \item{\emph{Cell parameters}}{ #' \item{\emph{Cell parameters}}{
#' \describe{ #' \describe{
#' \item{\code{cell.plates}}{Factor giving the plate that each cell #' \item{\code{cell.plates}}{Factor giving the plate that each cell
...@@ -346,11 +348,8 @@ setClass("Lun2Params", ...@@ -346,11 +348,8 @@ setClass("Lun2Params",
plate.ingroup = "character", plate.ingroup = "character",
plate.mod = "numeric", plate.mod = "numeric",
plate.var = "numeric", plate.var = "numeric",
gene.means = "numeric", gene.params = "data.frame",
gene.disps = "numeric", zi.params = "data.frame",
gene.ziMeans = "numeric",
gene.ziDisps = "numeric",
gene.ziProps = "numeric",
cell.plates = "numeric", cell.plates = "numeric",
cell.libSizes = "numeric", cell.libSizes = "numeric",
cell.libMod = "numeric", cell.libMod = "numeric",
...@@ -361,11 +360,13 @@ setClass("Lun2Params", ...@@ -361,11 +360,13 @@ setClass("Lun2Params",
plate.ingroup = "1", plate.ingroup = "1",
plate.mod = 1, plate.mod = 1,
plate.var = 14, plate.var = 14,
gene.means = rep(3.2, 10000), gene.params = data.frame(Mean = rep(3.2, 10000),
gene.disps = rep(0.03, 10000), Disp = rep(0.03, 10000)
gene.ziMeans = rep(1.6, 10000), ),
gene.ziDisps = rep(0.1, 10000), zi.params = data.frame(Mean = rep(1.6, 10000),
gene.ziProps = rep(2.3e-6, 10000), Disp = rep(0.1, 10000),
Prop = rep(2.3e-6, 10000)
),
cell.libSizes = rep(70000, 100), cell.libSizes = rep(70000, 100),
cell.libMod = 1, cell.libMod = 1,
de.nGenes = 0, de.nGenes = 0,
......
...@@ -9,7 +9,7 @@ newLun2Params <- function(...) { ...@@ -9,7 +9,7 @@ newLun2Params <- function(...) {
return(params) return(params)
} }
#' @importFrom checkmate checkInt checkNumber checkNumeric checkLogical #' @importFrom checkmate checkInt checkNumber checkNumeric checkDataFrame
#' checkCharacter checkFactor #' checkCharacter checkFactor
setValidity("Lun2Params", function(object) { setValidity("Lun2Params", function(object) {
...@@ -25,23 +25,30 @@ setValidity("Lun2Params", function(object) { ...@@ -25,23 +25,30 @@ setValidity("Lun2Params", function(object) {
plate.ingroup = checkCharacter(v$plate.ingroup, min.len = 1), plate.ingroup = checkCharacter(v$plate.ingroup, min.len = 1),
plate.mod = checkNumber(v$plate.mod, lower = 0), plate.mod = checkNumber(v$plate.mod, lower = 0),
plate.var = checkNumber(v$plate.var, lower = 0), plate.var = checkNumber(v$plate.var, lower = 0),
gene.means = checkNumeric(v$gene.means, lower = 0, gene.params = checkDataFrame(v$gene.params,
len = nGenes), types = "numeric",
gene.disps = checkNumeric(v$gene.disps, lower = 0, any.missing = FALSE,
len = nGenes), min.rows = 1, ncols = 2),
gene.ziMeans = checkNumeric(v$gene.ziMeans, lower = 0, zi.params = checkDataFrame(v$zi.params,
len = nGenes), types = "numeric",
gene.ziDisps = checkNumeric(v$gene.ziDisps, lower = 0, any.missing = FALSE,
len = nGenes), min.rows = 1, ncols = 3),
gene.ziProps = checkNumeric(v$gene.ziProps, lower = 0,
len = nGenes),
cell.plates = checkFactor(v$cell.plates, len = nCells), cell.plates = checkFactor(v$cell.plates, len = nCells),
cell.libSizes = checkNumeric(v$cell.libSizes, lower = 0, cell.libSizes = checkNumeric(v$cell.libSizes, lower = 0,
len = nCells), min.len = 1, any.missing = FALSE,
finite = TRUE),
cell.libMod = checkNumber(v$cell.libMod, lower = 0), cell.libMod = checkNumber(v$cell.libMod, lower = 0),
de.nGene = checkInt(v$de.nGenes, lower = 0), de.nGene = checkInt(v$de.nGenes, lower = 0),
de.fc = checkNumber(v$de.fc, lower = 0)) de.fc = checkNumber(v$de.fc, lower = 0))
if (!all(colnames(v$gene.params) == c("Mean", "Disp"))) {
checks <- c(checks, gene.params = "Incorrect column names")
}
if (!all(colnames(v$zi.params) == c("Mean", "Disp", "Prop"))) {
checks <- c(checks, gene.params = "Incorrect column names")
}
if (all(checks == TRUE)) { if (all(checks == TRUE)) {
valid <- TRUE valid <- TRUE
} else { } else {
...@@ -62,36 +69,11 @@ setMethod("setParam", "Lun2Params", function(object, name, value) { ...@@ -62,36 +69,11 @@ setMethod("setParam", "Lun2Params", function(object, name, value) {
} }
if (name == "cell.plates") { if (name == "cell.plates") {
old.nCells <- getParam(object, "nCells")
object <- setParamUnchecked(object, "nCells", length(value)) object <- setParamUnchecked(object, "nCells", length(value))
object <- setParamUnchecked(object, "nPlates", length(unique(value))) object <- setParamUnchecked(object, "nPlates", length(unique(value)))
if (length(value) != old.nCells) {
warning("nCells has been changed. cell.libSizes will be sampled ",
"to length nCells")
selected <- sample(seq_len(old.nCells), length(value),
replace = TRUE)
old.libSizes <- getParam(object, "cell.libSizes")
object <- setParamUnchecked(object, "cell.libSizes",
old.libSizes[selected])
}
value <- factor(value) value <- factor(value)
} }
if (name == "nGenes") {
old.nGenes <- getParam(object, "nGenes")
if (value != old.nGenes) {
warning("nGenes has been changed. Gene parameter vectors will be ",
"sampled to length new nGenes.")
selected <- sample(seq_len(old.nGenes), size = value,
replace = TRUE)
for (parameter in grep("gene", slotNames(object), value = TRUE)) {
old.value <- getParam(object, parameter)
object <- setParamUnchecked(object, parameter,
old.value[selected])
}
}
}
object <- callNextMethod() object <- callNextMethod()
return(object) return(object)
...@@ -102,11 +84,6 @@ setMethod("show", "Lun2Params", function(object) { ...@@ -102,11 +84,6 @@ setMethod("show", "Lun2Params", function(object) {
pp <- list("Plates:" = c("[Number]" = "nPlates", pp <- list("Plates:" = c("[Number]" = "nPlates",
"[Modifier]" = "plate.mod", "[Modifier]" = "plate.mod",
"(Variance)" = "plate.var"), "(Variance)" = "plate.var"),
"Genes:" = c("(Means)" = "gene.means",
"(Dispersions)" = "gene.disps",
"(ZI Means)" = "gene.ziMeans",
"(ZI Disps)" = "gene.ziDisps",
"(ZI Props)" = "gene.ziProps"),
"Cells:" = c("[Plates]" = "cell.plates", "Cells:" = c("[Plates]" = "cell.plates",
"(Library Sizes)" = "cell.libSizes", "(Library Sizes)" = "cell.libSizes",
"[Lib Size Mod]" = "cell.libMod"), "[Lib Size Mod]" = "cell.libMod"),
...@@ -114,5 +91,18 @@ setMethod("show", "Lun2Params", function(object) { ...@@ -114,5 +91,18 @@ setMethod("show", "Lun2Params", function(object) {
"[Fold change]" = "de.fc")) "[Fold change]" = "de.fc"))
callNextMethod() callNextMethod()
gene.params <- getParam(object, "gene.params")
zi.params <- getParam(object, "zi.params")
cat("Genes:", "\n")
cat("(Params)", "\n")
cat("data.frame with", dim(gene.params)[1], "features\n")
print(head(gene.params, n = 3))
cat(" ... ...\n")
cat("(ZI Params)", "\n")
cat("data.frame with", dim(zi.params)[1], "features\n")
print(head(zi.params, n = 3))
cat(" ... ... ...\n\n")
showPP(object, pp) showPP(object, pp)
}) })
...@@ -71,7 +71,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), ...@@ -71,7 +71,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
progress <- progress && verbose progress <- progress && verbose
checkmate::assertClass(params, "Lun2Params") checkmate::assertClass(params, "Lun2Params")
checkmate::assertInt(min.size, lower = 20, upper = length(plates)) checkmate::assertInt(min.size, lower = 1, upper = length(plates))
checkmate::assertIntegerish(plates, len = ncol(counts)) checkmate::assertIntegerish(plates, len = ncol(counts))
if (length(unique(plates)) < 2) { if (length(unique(plates)) < 2) {
...@@ -185,12 +185,13 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), ...@@ -185,12 +185,13 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop)) zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop))
params <- setParams(params, nGenes = nrow(counts), params <- setParams(params, nGenes = length(logmeans),
cell.plates = plates, plate.var = sigma2, cell.plates = plates, plate.var = sigma2,
gene.means = exp(logmeans), gene.params = data.frame(Mean = exp(logmeans),
gene.disps = dge$tagwise.dispersion, Disp = dge$tagwise.dispersion),
gene.ziMeans = zinb.mean, gene.ziDisps = zinb.disp, zi.params = data.frame(Mean = zinb.mean,
gene.ziProps = zinb.prop, Disp = zinb.disp,
Prop = zinb.prop),
cell.libSizes = dge$samples$lib.size) cell.libSizes = dge$samples$lib.size)
return(params) return(params)
......
...@@ -19,6 +19,13 @@ ...@@ -19,6 +19,13 @@
#' Library size factors are also applied and optionally a zero-inflated #' Library size factors are also applied and optionally a zero-inflated
#' negative-binomial can be used. #' negative-binomial can be used.
#' #'
#' If the number of genes to simulate differs from the number of provied gene
#' parameters or the number of cells to simulate differs from the number of
#' library sizes the relevant paramters will be sampled with a warning. This
#' allows any number of genes or cells to be simulated regardless of the
#' number in the dataset used in the estimation step but has the downside that
#' some genes or cells may be simulated multiple times.
#'
#' @return SCESet containing simulated counts. #' @return SCESet containing simulated counts.
#' #'
#' @references #' @references
...@@ -52,17 +59,54 @@ lun2Simulate <- function(params = newLun2Params(), zinb = FALSE, ...@@ -52,17 +59,54 @@ lun2Simulate <- function(params = newLun2Params(), zinb = FALSE,
plate.var <- getParam(params, "plate.var") / getParam(params, "plate.mod") plate.var <- getParam(params, "plate.var") / getParam(params, "plate.mod")
lib.sizes <- getParam(params, "cell.libSizes") lib.sizes <- getParam(params, "cell.libSizes")
lib.mod <- getParam(params, "cell.libMod") lib.mod <- getParam(params, "cell.libMod")
# Sample lib.sizes if necessary
if (length(lib.sizes) != nCells) {
warning("Number of lib.sizes not equal to nCells. ",
"lib.sizes will be sampled.")
selected <- sample(length(lib.sizes), nCells, replace = TRUE)
libSizes <- lib.sizes[selected]
}
# Select gene parameters depending on model # Select gene parameters depending on model
if (!zinb) { if (zinb) {
gene.means <- getParam(params, "gene.means") gene.params <- getParam(params, "zi.params")
gene.disps <- getParam(params, "gene.disps")
} else { } else {
gene.means <- getParam(params, "gene.ziMeans") gene.params <- getParam(params, "gene.params")
gene.disps <- getParam(params, "gene.ziDisps") }
gene.ziProps <- getParam(params, "gene.ziProps")
# Sample gene parameters if necessary
if (nrow(gene.params) != nGenes) {
warning("Number of gene parameters does not equal nGenes. ",
"Gene parameters will be sampled.")
selected <- sample(nrow(gene.params), nGenes, replace = TRUE)
gene.params <- gene.params[selected, ]
} }
gene.means <- gene.params$Mean
gene.disps <- gene.params$Disp
if (zinb) {
gene.ziProps <- gene.params$Prop
}
de.nGenes <- getParam(params, "de.nGenes") de.nGenes <- getParam(params, "de.nGenes")
#if (name == "nGenes") {
# old.nGenes <- getParam(object, "nGenes")
# if (value != old.nGenes) {
# warning("nGenes has been changed. Gene parameter vectors will be ",
# "sampled to length new nGenes.")
# selected <- sample(seq_len(old.nGenes), size = value,
# replace = TRUE)
# for (parameter in grep("gene", slotNames(object), value = TRUE)) {
# old.value <- getParam(object, parameter)
# object <- setParamUnchecked(object, parameter,
# old.value[selected])
# }
# }
#}
# Set up objects to store intermediate values # Set up objects to store intermediate values
cell.names <- paste0("Cell", seq_len(nCells)) cell.names <- paste0("Cell", seq_len(nCells))
gene.names <- paste0("Gene", seq_len(nGenes)) gene.names <- paste0("Gene", seq_len(nGenes))
......
...@@ -17,6 +17,17 @@ The Lun2 simulation uses the following parameters: ...@@ -17,6 +17,17 @@ The Lun2 simulation uses the following parameters:
\item{\code{nGenes}}{The number of genes to simulate.} \item{\code{nGenes}}{The number of genes to simulate.}
\item{\code{nCells}}{The number of cells to simulate.} \item{\code{nCells}}{The number of cells to simulate.}
\item{\code{[seed]}}{Seed to use for generating random numbers.} \item{\code{[seed]}}{Seed to use for generating random numbers.}
\item{\emph{Gene parameters}}{
\describe{
\item{\code{gene.params}}{A \code{data.frame} containing gene
parameters with two coloumns: \code{Mean} (mean expression for
each gene) and \code{Disp} (dispersion for each gene).}
\item{\code{zi.params}}{A \code{data.frame} containing
zero-inflated gene parameters with three coloumns: \code{Mean}
(mean expression for each gene), \code{Disp} (dispersion for
each, gene), and \code{Prop} (zero proportion for each gene).}
}
}
\item{\code{[nPlates]}}{The number of plates to simulate.} \item{\code{[nPlates]}}{The number of plates to simulate.}
\item{\emph{Plate parameters}}{ \item{\emph{Plate parameters}}{
\describe{ \describe{
...@@ -27,15 +38,6 @@ The Lun2 simulation uses the following parameters: ...@@ -27,15 +38,6 @@ The Lun2 simulation uses the following parameters:
\item{\code{plate.var}}{Plate effect variance.} \item{\code{plate.var}}{Plate effect variance.}
} }
} }
\item{\emph{Gene parameters}}{
\describe{
\item{\code{gene.means}}{Mean expression for each gene.}
\item{\code{gene.disps}}{Dispersion for each gene.}
\item{\code{gene.ziMeans}}{Zero-inflated gene means.}
\item{\code{gene.ziDisps}}{Zero-inflated gene dispersions.}
\item{\code{gene.ziProps}}{Zero-inflated gene zero proportions.}
}
}
\item{\emph{Cell parameters}}{ \item{\emph{Cell parameters}}{
\describe{ \describe{
\item{\code{cell.plates}}{Factor giving the plate that each cell \item{\code{cell.plates}}{Factor giving the plate that each cell
......
...@@ -32,6 +32,13 @@ simulation is the addition of plate effects. Differential expression can be ...@@ -32,6 +32,13 @@ simulation is the addition of plate effects. Differential expression can be
added between two groups of plates (an "ingroup" and all other plates). added between two groups of plates (an "ingroup" and all other plates).
Library size factors are also applied and optionally a zero-inflated Library size factors are also applied and optionally a zero-inflated
negative-binomial can be used. negative-binomial can be used.
If the number of genes to simulate differs from the number of provied gene
parameters or the number of cells to simulate differs from the number of
library sizes the relevant paramters will be sampled with a warning. This
allows any number of genes or cells to be simulated regardless of the
number in the dataset used in the estimation step but has the downside that
some genes or cells may be simulated multiple times.
} }
\examples{ \examples{
sim <- lun2Simulate() sim <- lun2Simulate()
......
...@@ -8,16 +8,11 @@ test_that("nCells checks work", { ...@@ -8,16 +8,11 @@ test_that("nCells checks work", {
"nPlates cannot be set directly, set cell.plates instead") "nPlates cannot be set directly, set cell.plates instead")
}) })
test_that("cell sampling works", { test_that("gene.params checks work", {
params <- newLun2Params() params <- newLun2Params()
expect_warning(setParam(params, "cell.plates", 1), expect_error(setParam(params, "gene.params", data.frame(A = 1, B = 1)),
paste("nCells has been changed. cell.libSizes will be", "gene.params: Incorrect column names")
"sampled to length nCells")) expect_error(setParam(params, "gene.params",
data.frame(Mean = 1, Disp = "a")),
"gene.params: May only contain the following types: numeric")
}) })
test_that("gene sampling works", {
params <- newLun2Params()
expect_warning(setParam(params, "nGenes", 1),
paste("nGenes has been changed. Gene parameter vectors will",
"be sampled to length new nGenes."))
})
\ No newline at end of file
...@@ -4,4 +4,16 @@ test_that("Lun2 simulation output is valid", { ...@@ -4,4 +4,16 @@ test_that("Lun2 simulation output is valid", {
expect_true(validObject(lun2Simulate())) expect_true(validObject(lun2Simulate()))
expect_true(validObject(lun2Simulate(de.nGenes = 100))) expect_true(validObject(lun2Simulate(de.nGenes = 100)))
expect_true(validObject(lun2Simulate(zinb = TRUE))) expect_true(validObject(lun2Simulate(zinb = TRUE)))
}) })
\ No newline at end of file
test_that("Gene sampling works", {
expect_warning(lun2Simulate(nGenes = 10),
paste("Number of gene parameters does not equal nGenes.",
"Gene parameters will be sampled."))
})
test_that("Cell sampling works", {
expect_warning(lun2Simulate(cell.plates = c(1, 1, 2)),
paste("Number of lib.sizes not equal to nCells.",
"lib.sizes will be sampled."))
})
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment