From de7cf89093e84b2ac16fcba67f3b69538240f41d Mon Sep 17 00:00:00 2001 From: Luke Zappia <lazappi@users.noreply.github.com> Date: Thu, 26 Sep 2019 14:33:41 +1000 Subject: [PATCH] Add doublets to SplotchSimulate Might need a bit more work... --- DESCRIPTION | 4 +- NEWS.md | 4 ++ R/AllClasses.R | 8 ++++ R/SplotchParams-methods.R | 4 ++ R/splotch-simulate.R | 79 +++++++++++++++++++++++++++++++++++---- man/SplotchParams.Rd | 6 +++ 6 files changed, 95 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 908dc2d..9eaece6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 1.9.7.9014 -Date: 2019-09-19 +Version: 1.9.7.9015 +Date: 2019-09-26 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 495471a..3cb698f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +### Version 1.9.7.9015 (2019-09-26) + +* Add doublets to SplotchSimulate + ### Version 1.9.7.9014 (2019-09-24) * Add ambient expression and empty cells to SplotchSimulate diff --git a/R/AllClasses.R b/R/AllClasses.R index e603805..11c05ca 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -331,6 +331,12 @@ setClass("SplatParams", #' structure.} #' } #' } +#' \item{\emph{Doublet parameters}}{ +#' \describe{ +#' \item{\code{[doublet.prop]}}{Proportion of cells that are +#' doublets.} +#' } +#' } #' \item{\emph{Ambient parameters}}{ #' \describe{ #' \item{\code{[ambient.scale]}}{Scaling factor for the library @@ -373,6 +379,7 @@ setClass("SplotchParams", lib.dens = "density", lib.method = "character", cells.design = "data.frame", + doublet.prop = "numeric", ambient.scale = "numeric", ambient.nEmpty = "numeric"), prototype = prototype(mean.rate = 0.3, @@ -406,6 +413,7 @@ setClass("SplotchParams", Alpha = 0, Beta = 1 ), + doublet.prop = 0, ambient.scale = 0.05, ambient.nEmpty = 0)) diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R index 17d6227..c3a51cc 100644 --- a/R/SplotchParams-methods.R +++ b/R/SplotchParams-methods.R @@ -57,6 +57,9 @@ setValidity("SplotchParams", function(object) { nrows = nrow( v$paths.design), ncols = 4), + doublet.prop = checkmate::check_number(v$doublet.prop, + lower = 0, + upper = 1), ambient.scale = checkmate::check_number(v$ambient.scale, lower = 0, upper = 1), @@ -159,6 +162,7 @@ setMethod("show", "SplotchParams", function(object) { "(Density)" = "lib.dens", "[Method]" = "lib.method"), "Cells:" = c("[Design]" = "cells.design"), + "Doublets:" = c("[Prop]" = "doublet.prop"), "Ambient:" = c("[Scale]" = "ambient.scale", "[Empty]" = "ambient.nEmpty")) diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R index 7082165..8096a1f 100644 --- a/R/splotch-simulate.R +++ b/R/splotch-simulate.R @@ -65,10 +65,19 @@ splotchSample <- function(params, verbose = TRUE) { if (verbose) {message("Creating simulation object...")} nGenes <- getParam(params, "nGenes") - nCells <- getParam(params, "nCells") - cell.names <- paste0("Cell", seq_len(nCells)) gene.names <- paste0("Gene", seq_len(nGenes)) + nCells <- getParam(params, "nCells") + doublet.prop <- getParam(params, "doublet.prop") + nDoublets <- floor(nCells * doublet.prop) + if (doublet.prop > 0) { + nCells <- nCells - nDoublets + cell.names <- c(paste0("Cell", seq_len(nCells)), + paste0("Doublet", seq_len(nDoublets))) + } else { + cell.names <- paste0("Cell", seq_len(nCells)) + } + nEmpty <- getParam(params, "ambient.nEmpty") if (nEmpty > 0) { empty.names <- paste0("Empty", seq_len(nEmpty)) @@ -76,7 +85,8 @@ splotchSample <- function(params, verbose = TRUE) { } cells <- data.frame(Cell = cell.names, - Type = rep(c("Cell", "Empty"), c(nCells, nEmpty)), + Type = rep(c("Cell", "Doublet", "Empty"), + c(nCells, nDoublets, nEmpty)), row.names = cell.names) features <- data.frame(Gene = gene.names, BaseMean = getParam(params, "mean.values"), @@ -265,6 +275,7 @@ splotchSimLibSizes <- function(sim, params, verbose) { if (verbose) {message("Simulating library sizes...")} nCells <- getParam(params, "nCells") nEmpty <- getParam(params, "ambient.nEmpty") + is.doublet <- colData(sim)$Type == "Doublet" lib.method <- getParam(params, "lib.method") if (lib.method == "fit") { @@ -281,6 +292,7 @@ splotchSimLibSizes <- function(sim, params, verbose) { } cell.lib.sizes <- c(cell.lib.sizes, rep(0, nEmpty)) + cell.lib.sizes[is.doublet] <- 1.5 * cell.lib.sizes[is.doublet] colData(sim)$CellLibSize <- cell.lib.sizes ambient.scale <- getParam(params, "ambient.scale") @@ -299,7 +311,8 @@ splotchSimCellMeans <- function(sim, params, verbose) { cell.names <- colData(sim)$Cell gene.names <- rowData(sim)$Gene - nCells <- getParam(params, "nCells") + nDoublets <- sum(colData(sim)$Type == "Doublet") + nCells <- getParam(params, "nCells") - nDoublets cells.design <- getParam(params, "cells.design") paths.design <- getParam(params, "paths.design") paths.means <- getParam(params, "paths.means") @@ -338,10 +351,47 @@ splotchSimCellMeans <- function(sim, params, verbose) { return(means) }) + if (nDoublets > 0) { + if (verbose) {message("Assigning doublets...")} + doublet.paths1 <- sample(cells.design$Path, nDoublets, replace = TRUE, + prob = cells.design$Probability) + doublet.paths2 <- sample(cells.design$Path, nDoublets, replace = TRUE, + prob = cells.design$Probability) + + doublet.steps1 <- sapply(doublet.paths1, function(path) { + probs <- steps.probs[[path]] + step <- sample(1:length(probs), 1, prob = probs) + return(step) + }) + doublet.steps2 <- sapply(doublet.paths2, function(path) { + probs <- steps.probs[[path]] + step <- sample(1:length(probs), 1, prob = probs) + return(step) + }) + + if (verbose) {message("Simulating doublet means...")} + doublet.means1 <- sapply(seq_len(nDoublets), function(doublet) { + path <- doublet.paths1[doublet] + step <- doublet.steps1[doublet] + means <- paths.means[[path]][, step] + return(means) + }) + doublet.means2 <- sapply(seq_len(nDoublets), function(doublet) { + path <- doublet.paths2[doublet] + step <- doublet.steps2[doublet] + means <- paths.means[[path]][, step] + return(means) + }) + doublet.means <- (doublet.means1 + doublet.means2) * 0.5 + + cells.means <- cbind(cells.means, doublet.means) + } + # Adjust mean based on library size cells.props <- t(t(cells.means) / colSums(cells.means)) cells.means <- t(t(cells.props) * cell.lib.sizes[not.empty]) + if (verbose) {message("Applying BCV adjustment...")} nGenes <- getParam(params, "nGenes") bcv.common <- getParam(params, "bcv.common") bcv.df <- getParam(params, "bcv.df") @@ -355,9 +405,9 @@ splotchSimCellMeans <- function(sim, params, verbose) { } cells.means <- matrix(rgamma( - as.numeric(nGenes) * as.numeric(nCells), + as.numeric(nGenes) * as.numeric(nCells + nDoublets), shape = 1 / (bcv ^ 2), scale = cells.means * (bcv ^ 2)), - nrow = nGenes, ncol = nCells) + nrow = nGenes, ncol = nCells + nDoublets) empty.means <- matrix(0, nrow = nGenes, ncol = nEmpty) cells.means <- cbind(cells.means, empty.means) @@ -365,8 +415,21 @@ splotchSimCellMeans <- function(sim, params, verbose) { colnames(cells.means) <- cell.names rownames(cells.means) <- gene.names - colData(sim)$Path <- c(cells.paths, rep(NA, nEmpty)) - colData(sim)$Step <- c(cells.steps, rep(NA, nEmpty)) + colData(sim)$Path <- factor(c(cells.paths, rep(NA, nDoublets), + rep(NA, nEmpty))) + colData(sim)$Step <- c(cells.steps, rep(NA, nDoublets), rep(NA, nEmpty)) + + if (nDoublets > 0) { + colData(sim)$Path1 <- factor(c(rep(NA, nCells), doublet.paths1, + rep(NA, nEmpty))) + colData(sim)$Step1 <- c(rep(NA, nCells), doublet.steps1, + rep(NA, nEmpty)) + colData(sim)$Path2 <- factor(c(rep(NA, nCells), doublet.paths2, + rep(NA, nEmpty))) + colData(sim)$Step2 <- c(rep(NA, nCells), doublet.steps2, + rep(NA, nEmpty)) + } + assays(sim)$CellMeans <- cells.means return(sim) diff --git a/man/SplotchParams.Rd b/man/SplotchParams.Rd index 07dc3ba..7fb9c84 100644 --- a/man/SplotchParams.Rd +++ b/man/SplotchParams.Rd @@ -81,6 +81,12 @@ The Splotch simulation uses the following parameters: structure.} } } + \item{\emph{Doublet parameters}}{ + \describe{ + \item{\code{[doublet.prop]}}{Proportion of cells that are + doublets.} + } + } \item{\emph{Ambient parameters}}{ \describe{ \item{\code{[ambient.scale]}}{Scaling factor for the library -- GitLab