Skip to content
Snippets Groups Projects
Commit 2151840f authored by Luke Zappia's avatar Luke Zappia
Browse files

Add library size params and simulate cell means

parent 7afee23b
No related branches found
No related tags found
No related merge requests found
Package: splatter
Type: Package
Title: Simple Simulation of Single-cell RNA Sequencing Data
Version: 1.9.2.9002
Date: 2019-07-16
Version: 1.9.2.9003
Date: 2019-07-17
Author: Luke Zappia
Authors@R:
c(person("Luke", "Zappia", role = c("aut", "cre"),
......
......@@ -134,6 +134,7 @@ importFrom(methods,slotNames)
importFrom(methods,validObject)
importFrom(stats,aggregate)
importFrom(stats,cor)
importFrom(stats,dbeta)
importFrom(stats,dnbinom)
importFrom(stats,ks.test)
importFrom(stats,median)
......
### Version 1.9.2.9003 (2019-07-17)
* Topologically sort paths
* Add library size parameters
* Simulate cell means
### Version 1.9.2.9002 (2019-07-16)
* Add paths parameters
......
......@@ -290,6 +290,22 @@ setClass("SplatParams",
#' structure.}
#' }
#' }
#' \item{\emph{Library size parameters}}{
#' \describe{
#' \item{\code{lib.loc}}{Location (meanlog) parameter for the
#' library size log-normal distribution, or mean parameter if a
#' normal distribution is used.}
#' \item{\code{lib.scale}}{Scale (sdlog) parameter for the library
#' size log-normal distribution, or sd parameter if a normal
#' distribution is used.}
#' }
#' }
#' \item{\emph{Paths parameters}}{
#' \describe{
#' \item{\code{[cells.design]}}{data.frame describing cell
#' structure.}
#' }
#' }
#' }
#'
#' The parameters not shown in brackets can be estimated from real data using
......@@ -310,7 +326,10 @@ setClass("SplotchParams",
network.regsSet = "logical",
paths.nPrograms = "numeric",
paths.design = "data.frame",
paths.means = "list"),
paths.means = "list",
lib.loc = "numeric",
lib.scale = "numeric",
cells.design = "data.frame"),
prototype = prototype(mean.rate = 0.3,
mean.shape = 0.6,
mean.values = numeric(),
......@@ -323,7 +342,15 @@ setClass("SplotchParams",
From = 0,
Steps = 100
),
paths.means = list()))
paths.means = list(),
lib.loc = 11,
lib.scale = 0.2,
cells.design = data.frame(
Path = 1,
Probability = 1,
Alpha = 0,
Beta = 1
)))
#' The LunParams class
#'
......
......@@ -33,7 +33,15 @@ setValidity("SplotchParams", function(object) {
types = "numeric",
any.missing = FALSE,
min.rows = 1,
ncols = 3))
ncols = 3),
lib.loc = checkNumber(v$lib.loc),
lib.scale = checkNumber(v$lib.scale, lower = 0),
cells.design = checkmate::checkDataFrame(v$cells.design,
types = "numeric",
any.missing = FALSE,
nrows = nrow(
v$paths.design),
ncols = 4))
if (!checkmate::testNumeric(v$mean.values, len = 0)) {
checks <- c(checks,
......@@ -73,6 +81,27 @@ setValidity("SplotchParams", function(object) {
names = "unique"))
}
if (!all(colnames(v$cells.design) == c("Path", "Probability", "Alpha",
"Beta"))) {
checks <- c(checks, cells.design = "Incorrect column names")
} else {
if (!all(sort(v$cells.design$Path) == sort(v$paths.design$Path))) {
checks <- c(checks,
cells.design = "Path names don't match paths.design")
}
if (sum(v$cells.design$Probability) != 1) {
checks <- c(checks, cells.design = "Probability must sum to 1")
}
checks <- c(checks,
"cells.design$Alpha" = checkmate::checkNumeric(
v$cells.design$Alpha,
lower = 0, any.missing = FALSE))
checks <- c(checks,
"cells.design$Beta" = checkmate::checkNumeric(
v$cells.design$Beta,
lower = 0, any.missing = FALSE))
}
if (all(checks == TRUE)) {
valid <- TRUE
} else {
......@@ -97,6 +126,10 @@ setMethod("show", "SplotchParams", function(object) {
pp.paths <- list("Paths:" = c("[nPrograms]" = "paths.nPrograms",
"[Design]" = "paths.design"))
pp.bot <- list("Library size:" = c("(Location)" = "lib.loc",
"(Scale)" = "lib.scale"),
"Cells:" = c("[Design]" = "cells.design"))
paths.means <- getParam(object, "paths.means")
if (length(paths.means) == 0) {
pp.paths[[1]] <- c(pp.paths[[1]], "[Means]*" = "paths.means")
......@@ -130,9 +163,11 @@ setMethod("show", "SplotchParams", function(object) {
if (length(paths.means) != 0) {
cat(crayon::bgYellow(crayon::bold(crayon::blue("[MEANS]\n"))))
cat(crayon::bold(crayon::green(paste(
"List of", length(paths.means), "matrices"
"List of", length(paths.means), "matrices\n\n"
))))
}
showPP(object, pp.bot)
})
#' @rdname setParam
......@@ -185,6 +220,15 @@ setMethod("setParam", "SplotchParams", function(object, name, value) {
stop("network.regsSet can not be set manually")
}
if (name == "paths.design" &&
(nrow(value) != nrow(getParam(object, "paths.design")))) {
warning("cells.design reset to match paths.design")
cells.design <- data.frame(Path = value$Path,
Probability = 1 / nrow(value),
Alpha = 0, Beta = 1)
object <- setParamUnchecked(object, "cells.design", cells.design)
}
object <- callNextMethod()
return(object)
......@@ -197,7 +241,7 @@ setMethod("setParams", "SplotchParams", function(object, update = NULL, ...) {
update <- c(update, list(...))
update <- bringItemsForward(update, c("network.graph"))
update <- bringItemsForward(update, c("network.graph", "paths.design"))
object <- callNextMethod(object, update)
......
......@@ -16,6 +16,7 @@
#'
#' @export
#' @importFrom SingleCellExperiment SingleCellExperiment
#' @importFrom stats dbeta
splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) {
checkmate::assertClass(params, "SplotchParams")
......@@ -57,6 +58,7 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) {
}
# Generate paths
if (verbose) {message("Simulating paths...")}
paths.design <- getParam(params, "paths.design")
network.graph <- getParam(params, "network.graph")
network.weights <- igraph::as_adjacency_matrix(network.graph,
......@@ -68,7 +70,15 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) {
nrow = network.nRegs, ncol = paths.nPrograms)
paths.changes <- vector("list", nrow(paths.design))
paths.factors <- vector("list", nrow(paths.design))
for (path in seq_len(nrow(paths.design))) {
paths.graph <- igraph::graph_from_data_frame(paths.design)
paths.order <- names(igraph::topo_sort(paths.graph, mode = "in"))
paths.order <- as.numeric(paths.order)
# The origin is not a path
paths.order <- paths.order[paths.order != 0]
for (path in paths.order) {
if (verbose) {message("Simulating path ", path, "...")}
nSteps <- paths.design$Steps[path]
from <- paths.design$From[path]
changes <- matrix(0, nrow = nGenes, ncol = nSteps + 1)
......@@ -104,7 +114,54 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) {
names(paths.means) <- paste0("Path", paths.design$Path)
params <- setParam(params, "paths.means", paths.means)
# Simulate base gene means
if (verbose) {message("Simulating library sizes...")}
nCells <- getParam(params, "nCells")
lib.loc <- getParam(params, "lib.loc")
lib.scale <- getParam(params, "lib.scale")
lib.sizes <- rlnorm(nCells, lib.loc, lib.scale)
if (verbose) {message("Simulating cell means...")}
nCells <- getParam(params, "nCells")
cells.design <- getParam(params, "cells.design")
cells.paths <- sample(cells.design$Path, nCells, replace = TRUE,
prob = cells.design$Probability)
paths.design <- getParam(params, "paths.design")
paths.cells.design <- merge(paths.design, cells.design)
steps.probs <- apply(paths.cells.design, 1, function(path) {
steps <- path["Steps"]
dens <- dbeta(seq(0, 1, length.out = steps),
path["Alpha"], path["Beta"])
# Adjust for infinite values at edge of distribution
dens.inf <- !is.finite(dens)
if (any(dens.inf) && all(dens[!dens.inf] == 0)) {
dens[dens.inf] <- 1
}
if (!is.finite(dens[1])) {
dens[1] <- 1.1 * dens[2]
}
if (!is.finite(dens[steps])) {
dens[steps] <- 1.1 * dens[steps - 1]
}
probs <- dens / sum(dens)
# Return a list to avoid getting a matrix if all path lengths are equal
return(list(probs))
})
# Remove unnecessary list level
steps.probs <- lapply(steps.probs, "[[", 1)
names(steps.probs) <- paths.cells.design$Path
cells.steps <- sapply(cells.paths, function(path) {
probs <- steps.probs[[path]]
step <- sample(1:length(probs), 1, prob = probs)
return(step)
})
cells.means <- sapply(seq_len(nCells), function(cell) {
path <- cells.paths[cell]
step <- cells.steps[cell]
means <- paths.means[[path]][, step]
return(means)
})
# sim <- SingleCellExperiment(assays = list(counts = counts),
# rowData = features,
......
......@@ -40,6 +40,22 @@ The Splotch simulation uses the following parameters:
structure.}
}
}
\item{\emph{Library size parameters}}{
\describe{
\item{\code{lib.loc}}{Location (meanlog) parameter for the
library size log-normal distribution, or mean parameter if a
normal distribution is used.}
\item{\code{lib.scale}}{Scale (sdlog) parameter for the library
size log-normal distribution, or sd parameter if a normal
distribution is used.}
}
}
\item{\emph{Paths parameters}}{
\describe{
\item{\code{[cells.design]}}{data.frame describing cell
structure.}
}
}
}
The parameters not shown in brackets can be estimated from real data using
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment