Skip to content
Snippets Groups Projects
Commit de7cf890 authored by Luke Zappia's avatar Luke Zappia
Browse files

Add doublets to SplotchSimulate

Might need a bit more work...
parent 790a051b
No related branches found
No related tags found
No related merge requests found
Package: splatter
Type: Package
Title: Simple Simulation of Single-cell RNA Sequencing Data
Version: 1.9.7.9014
Date: 2019-09-19
Version: 1.9.7.9015
Date: 2019-09-26
Author: Luke Zappia
Authors@R:
c(person("Luke", "Zappia", role = c("aut", "cre"),
......
### Version 1.9.7.9015 (2019-09-26)
* Add doublets to SplotchSimulate
### Version 1.9.7.9014 (2019-09-24)
* Add ambient expression and empty cells to SplotchSimulate
......
......@@ -331,6 +331,12 @@ setClass("SplatParams",
#' structure.}
#' }
#' }
#' \item{\emph{Doublet parameters}}{
#' \describe{
#' \item{\code{[doublet.prop]}}{Proportion of cells that are
#' doublets.}
#' }
#' }
#' \item{\emph{Ambient parameters}}{
#' \describe{
#' \item{\code{[ambient.scale]}}{Scaling factor for the library
......@@ -373,6 +379,7 @@ setClass("SplotchParams",
lib.dens = "density",
lib.method = "character",
cells.design = "data.frame",
doublet.prop = "numeric",
ambient.scale = "numeric",
ambient.nEmpty = "numeric"),
prototype = prototype(mean.rate = 0.3,
......@@ -406,6 +413,7 @@ setClass("SplotchParams",
Alpha = 0,
Beta = 1
),
doublet.prop = 0,
ambient.scale = 0.05,
ambient.nEmpty = 0))
......
......@@ -57,6 +57,9 @@ setValidity("SplotchParams", function(object) {
nrows = nrow(
v$paths.design),
ncols = 4),
doublet.prop = checkmate::check_number(v$doublet.prop,
lower = 0,
upper = 1),
ambient.scale = checkmate::check_number(v$ambient.scale,
lower = 0,
upper = 1),
......@@ -159,6 +162,7 @@ setMethod("show", "SplotchParams", function(object) {
"(Density)" = "lib.dens",
"[Method]" = "lib.method"),
"Cells:" = c("[Design]" = "cells.design"),
"Doublets:" = c("[Prop]" = "doublet.prop"),
"Ambient:" = c("[Scale]" = "ambient.scale",
"[Empty]" = "ambient.nEmpty"))
......
......@@ -65,10 +65,19 @@ splotchSample <- function(params, verbose = TRUE) {
if (verbose) {message("Creating simulation object...")}
nGenes <- getParam(params, "nGenes")
nCells <- getParam(params, "nCells")
cell.names <- paste0("Cell", seq_len(nCells))
gene.names <- paste0("Gene", seq_len(nGenes))
nCells <- getParam(params, "nCells")
doublet.prop <- getParam(params, "doublet.prop")
nDoublets <- floor(nCells * doublet.prop)
if (doublet.prop > 0) {
nCells <- nCells - nDoublets
cell.names <- c(paste0("Cell", seq_len(nCells)),
paste0("Doublet", seq_len(nDoublets)))
} else {
cell.names <- paste0("Cell", seq_len(nCells))
}
nEmpty <- getParam(params, "ambient.nEmpty")
if (nEmpty > 0) {
empty.names <- paste0("Empty", seq_len(nEmpty))
......@@ -76,7 +85,8 @@ splotchSample <- function(params, verbose = TRUE) {
}
cells <- data.frame(Cell = cell.names,
Type = rep(c("Cell", "Empty"), c(nCells, nEmpty)),
Type = rep(c("Cell", "Doublet", "Empty"),
c(nCells, nDoublets, nEmpty)),
row.names = cell.names)
features <- data.frame(Gene = gene.names,
BaseMean = getParam(params, "mean.values"),
......@@ -265,6 +275,7 @@ splotchSimLibSizes <- function(sim, params, verbose) {
if (verbose) {message("Simulating library sizes...")}
nCells <- getParam(params, "nCells")
nEmpty <- getParam(params, "ambient.nEmpty")
is.doublet <- colData(sim)$Type == "Doublet"
lib.method <- getParam(params, "lib.method")
if (lib.method == "fit") {
......@@ -281,6 +292,7 @@ splotchSimLibSizes <- function(sim, params, verbose) {
}
cell.lib.sizes <- c(cell.lib.sizes, rep(0, nEmpty))
cell.lib.sizes[is.doublet] <- 1.5 * cell.lib.sizes[is.doublet]
colData(sim)$CellLibSize <- cell.lib.sizes
ambient.scale <- getParam(params, "ambient.scale")
......@@ -299,7 +311,8 @@ splotchSimCellMeans <- function(sim, params, verbose) {
cell.names <- colData(sim)$Cell
gene.names <- rowData(sim)$Gene
nCells <- getParam(params, "nCells")
nDoublets <- sum(colData(sim)$Type == "Doublet")
nCells <- getParam(params, "nCells") - nDoublets
cells.design <- getParam(params, "cells.design")
paths.design <- getParam(params, "paths.design")
paths.means <- getParam(params, "paths.means")
......@@ -338,10 +351,47 @@ splotchSimCellMeans <- function(sim, params, verbose) {
return(means)
})
if (nDoublets > 0) {
if (verbose) {message("Assigning doublets...")}
doublet.paths1 <- sample(cells.design$Path, nDoublets, replace = TRUE,
prob = cells.design$Probability)
doublet.paths2 <- sample(cells.design$Path, nDoublets, replace = TRUE,
prob = cells.design$Probability)
doublet.steps1 <- sapply(doublet.paths1, function(path) {
probs <- steps.probs[[path]]
step <- sample(1:length(probs), 1, prob = probs)
return(step)
})
doublet.steps2 <- sapply(doublet.paths2, function(path) {
probs <- steps.probs[[path]]
step <- sample(1:length(probs), 1, prob = probs)
return(step)
})
if (verbose) {message("Simulating doublet means...")}
doublet.means1 <- sapply(seq_len(nDoublets), function(doublet) {
path <- doublet.paths1[doublet]
step <- doublet.steps1[doublet]
means <- paths.means[[path]][, step]
return(means)
})
doublet.means2 <- sapply(seq_len(nDoublets), function(doublet) {
path <- doublet.paths2[doublet]
step <- doublet.steps2[doublet]
means <- paths.means[[path]][, step]
return(means)
})
doublet.means <- (doublet.means1 + doublet.means2) * 0.5
cells.means <- cbind(cells.means, doublet.means)
}
# Adjust mean based on library size
cells.props <- t(t(cells.means) / colSums(cells.means))
cells.means <- t(t(cells.props) * cell.lib.sizes[not.empty])
if (verbose) {message("Applying BCV adjustment...")}
nGenes <- getParam(params, "nGenes")
bcv.common <- getParam(params, "bcv.common")
bcv.df <- getParam(params, "bcv.df")
......@@ -355,9 +405,9 @@ splotchSimCellMeans <- function(sim, params, verbose) {
}
cells.means <- matrix(rgamma(
as.numeric(nGenes) * as.numeric(nCells),
as.numeric(nGenes) * as.numeric(nCells + nDoublets),
shape = 1 / (bcv ^ 2), scale = cells.means * (bcv ^ 2)),
nrow = nGenes, ncol = nCells)
nrow = nGenes, ncol = nCells + nDoublets)
empty.means <- matrix(0, nrow = nGenes, ncol = nEmpty)
cells.means <- cbind(cells.means, empty.means)
......@@ -365,8 +415,21 @@ splotchSimCellMeans <- function(sim, params, verbose) {
colnames(cells.means) <- cell.names
rownames(cells.means) <- gene.names
colData(sim)$Path <- c(cells.paths, rep(NA, nEmpty))
colData(sim)$Step <- c(cells.steps, rep(NA, nEmpty))
colData(sim)$Path <- factor(c(cells.paths, rep(NA, nDoublets),
rep(NA, nEmpty)))
colData(sim)$Step <- c(cells.steps, rep(NA, nDoublets), rep(NA, nEmpty))
if (nDoublets > 0) {
colData(sim)$Path1 <- factor(c(rep(NA, nCells), doublet.paths1,
rep(NA, nEmpty)))
colData(sim)$Step1 <- c(rep(NA, nCells), doublet.steps1,
rep(NA, nEmpty))
colData(sim)$Path2 <- factor(c(rep(NA, nCells), doublet.paths2,
rep(NA, nEmpty)))
colData(sim)$Step2 <- c(rep(NA, nCells), doublet.steps2,
rep(NA, nEmpty))
}
assays(sim)$CellMeans <- cells.means
return(sim)
......
......@@ -81,6 +81,12 @@ The Splotch simulation uses the following parameters:
structure.}
}
}
\item{\emph{Doublet parameters}}{
\describe{
\item{\code{[doublet.prop]}}{Proportion of cells that are
doublets.}
}
}
\item{\emph{Ambient parameters}}{
\describe{
\item{\code{[ambient.scale]}}{Scaling factor for the library
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment