From 790a051bb7c14af44b70f92688dc5f9b21505a40 Mon Sep 17 00:00:00 2001 From: Luke Zappia <lazappi@users.noreply.github.com> Date: Tue, 24 Sep 2019 12:16:13 +1000 Subject: [PATCH] Add ambient expression and empty cells to Splotch --- DESCRIPTION | 5 +- NAMESPACE | 6 -- NEWS.md | 4 ++ R/AllClasses.R | 19 +++++- R/SplotchParams-methods.R | 16 ++++- R/splotch-simulate.R | 123 +++++++++++++++++++++++++++++++------- man/SplotchParams.Rd | 11 +++- 7 files changed, 150 insertions(+), 34 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0ceb4d8..908dc2d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 1.9.7.9012 +Version: 1.9.7.9014 Date: 2019-09-19 Author: Luke Zappia Authors@R: @@ -63,7 +63,8 @@ Suggests: SparseDC, BiocManager, spelling, - igraph + igraph, + DropletUtils biocViews: SingleCell, RNASeq, Transcriptomics, GeneExpression, Sequencing, Software, ImmunoOncology URL: https://github.com/Oshlack/splatter diff --git a/NAMESPACE b/NAMESPACE index d0fd874..08a786f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -66,11 +66,8 @@ export(splatSimulate) export(splatSimulateGroups) export(splatSimulatePaths) export(splatSimulateSingle) -<<<<<<< HEAD export(splotchEstimate) export(splotchSimulate) -======= ->>>>>>> master export(summariseDiff) export(zinbEstimate) export(zinbSimulate) @@ -142,11 +139,8 @@ importFrom(methods,validObject) importFrom(stats,aggregate) importFrom(stats,approxfun) importFrom(stats,cor) -<<<<<<< HEAD importFrom(stats,dbeta) importFrom(stats,density) -======= ->>>>>>> master importFrom(stats,dnbinom) importFrom(stats,ks.test) importFrom(stats,median) diff --git a/NEWS.md b/NEWS.md index 48494f7..495471a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +### Version 1.9.7.9014 (2019-09-24) + +* Add ambient expression and empty cells to SplotchSimulate + ### Version 1.9.7.9013 (2019-09-19) * Merge master into splotch branch diff --git a/R/AllClasses.R b/R/AllClasses.R index 18d6b62..e603805 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -325,12 +325,21 @@ setClass("SplatParams", #' "density" to sample from the provided density object.} #' } #' } -#' \item{\emph{Paths parameters}}{ +#' \item{\emph{Design parameters}}{ #' \describe{ #' \item{\code{[cells.design]}}{data.frame describing cell #' structure.} #' } #' } +#' \item{\emph{Ambient parameters}}{ +#' \describe{ +#' \item{\code{[ambient.scale]}}{Scaling factor for the library +#' size log-normal distribution when generating ambient library +#' sizes.} +#' \item{\code{[ambient.nEmpty]}}{Number of empty cells to +#' simulate.} +#' } +#' } #' } #' #' The parameters not shown in brackets can be estimated from real data using @@ -363,7 +372,9 @@ setClass("SplotchParams", lib.scale = "numeric", lib.dens = "density", lib.method = "character", - cells.design = "data.frame"), + cells.design = "data.frame", + ambient.scale = "numeric", + ambient.nEmpty = "numeric"), prototype = prototype(mean.rate = 0.3, mean.shape = 0.6, mean.outProb = 0.05, @@ -394,7 +405,9 @@ setClass("SplotchParams", Probability = 1, Alpha = 0, Beta = 1 - ))) + ), + ambient.scale = 0.05, + ambient.nEmpty = 0)) #' The LunParams class #' diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R index 2f84c91..17d6227 100644 --- a/R/SplotchParams-methods.R +++ b/R/SplotchParams-methods.R @@ -7,6 +7,10 @@ newSplotchParams <- function(...) { stop("The Splotch simulation requires the 'igraph' package.") } + if (!requireNamespace("DropletUtils", quietly = TRUE)) { + stop("The Splotch simulation requires the 'DropletUtils' package.") + } + params <- new("SplotchParams") params <- setParams(params, ...) @@ -52,7 +56,13 @@ setValidity("SplotchParams", function(object) { any.missing = FALSE, nrows = nrow( v$paths.design), - ncols = 4)) + ncols = 4), + ambient.scale = checkmate::check_number(v$ambient.scale, + lower = 0, + upper = 1), + ambient.nEmpty = checkmate::check_number(v$ambient.nEmpty, + lower = 0, + finite = TRUE)) if (!checkmate::testNumeric(v$mean.values, len = 0)) { checks <- c(checks, @@ -148,7 +158,9 @@ setMethod("show", "SplotchParams", function(object) { "(Scale)" = "lib.scale", "(Density)" = "lib.dens", "[Method]" = "lib.method"), - "Cells:" = c("[Design]" = "cells.design")) + "Cells:" = c("[Design]" = "cells.design"), + "Ambient:" = c("[Scale]" = "ambient.scale", + "[Empty]" = "ambient.nEmpty")) paths.means <- getParam(object, "paths.means") if (length(paths.means) == 0) { diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R index faa73f9..7082165 100644 --- a/R/splotch-simulate.R +++ b/R/splotch-simulate.R @@ -69,13 +69,25 @@ splotchSample <- function(params, verbose = TRUE) { cell.names <- paste0("Cell", seq_len(nCells)) gene.names <- paste0("Gene", seq_len(nGenes)) - cells <- data.frame(Cell = cell.names, row.names = cell.names) - features <- data.frame(Gene = gene.names, row.names = gene.names) + nEmpty <- getParam(params, "ambient.nEmpty") + if (nEmpty > 0) { + empty.names <- paste0("Empty", seq_len(nEmpty)) + cell.names <- c(cell.names, empty.names) + } + + cells <- data.frame(Cell = cell.names, + Type = rep(c("Cell", "Empty"), c(nCells, nEmpty)), + row.names = cell.names) + features <- data.frame(Gene = gene.names, + BaseMean = getParam(params, "mean.values"), + row.names = gene.names) sim <- SingleCellExperiment(rowData = features, colData = cells, metadata = list(Params = params)) sim <- splotchSimLibSizes(sim, params, verbose) sim <- splotchSimCellMeans(sim, params, verbose) + sim <- splotchSimCellCounts(sim, params, verbose) + sim <- splotchSimAmbientCounts(sim, params, verbose) sim <- splotchSimCounts(sim, params, verbose) return(sim) @@ -252,6 +264,7 @@ splotchSimLibSizes <- function(sim, params, verbose) { if (verbose) {message("Simulating library sizes...")} nCells <- getParam(params, "nCells") + nEmpty <- getParam(params, "ambient.nEmpty") lib.method <- getParam(params, "lib.method") if (lib.method == "fit") { @@ -259,15 +272,24 @@ splotchSimLibSizes <- function(sim, params, verbose) { lib.loc <- getParam(params, "lib.loc") lib.scale <- getParam(params, "lib.scale") - exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale) + cell.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale) } else if (lib.method == "density") { if (verbose) {message("Sampling from density object...")} lib.dens <- getParam(params, "lib.dens") - exp.lib.sizes <- sampleDensity(nCells, lib.dens) + cell.lib.sizes <- sampleDensity(nCells, lib.dens) } - colData(sim)$ExpLibSize <- exp.lib.sizes + cell.lib.sizes <- c(cell.lib.sizes, rep(0, nEmpty)) + colData(sim)$CellLibSize <- cell.lib.sizes + + ambient.scale <- getParam(params, "ambient.scale") + if (ambient.scale > 0) { + ambient.loc <- log(exp(lib.loc) * ambient.scale) + + ambient.lib.sizes <- rlnorm(nCells + nEmpty, ambient.loc, 0.3) + colData(sim)$AmbientLibSize <- ambient.lib.sizes + } return(sim) } @@ -281,7 +303,9 @@ splotchSimCellMeans <- function(sim, params, verbose) { cells.design <- getParam(params, "cells.design") paths.design <- getParam(params, "paths.design") paths.means <- getParam(params, "paths.means") - exp.lib.sizes <- colData(sim)$ExpLibSize + cell.lib.sizes <- colData(sim)$CellLibSize + nEmpty <- getParam(params, "ambient.nEmpty") + not.empty <- colData(sim)$Type != "Empty" if (verbose) {message("Assigning cells to paths...")} cells.paths <- sample(cells.design$Path, nCells, replace = TRUE, @@ -316,10 +340,7 @@ splotchSimCellMeans <- function(sim, params, verbose) { # Adjust mean based on library size cells.props <- t(t(cells.means) / colSums(cells.means)) - cells.means <- t(t(cells.props) * exp.lib.sizes) - - colnames(cells.means) <- cell.names - rownames(cells.means) <- gene.names + cells.means <- t(t(cells.props) * cell.lib.sizes[not.empty]) nGenes <- getParam(params, "nGenes") bcv.common <- getParam(params, "bcv.common") @@ -338,30 +359,92 @@ splotchSimCellMeans <- function(sim, params, verbose) { shape = 1 / (bcv ^ 2), scale = cells.means * (bcv ^ 2)), nrow = nGenes, ncol = nCells) - colData(sim)$Path <- cells.paths - colData(sim)$Step <- cells.steps + empty.means <- matrix(0, nrow = nGenes, ncol = nEmpty) + cells.means <- cbind(cells.means, empty.means) + + colnames(cells.means) <- cell.names + rownames(cells.means) <- gene.names + + colData(sim)$Path <- c(cells.paths, rep(NA, nEmpty)) + colData(sim)$Step <- c(cells.steps, rep(NA, nEmpty)) assays(sim)$CellMeans <- cells.means return(sim) } -splotchSimCounts <- function(sim, params, verbose) { +splotchSimCellCounts <- function(sim, params, verbose) { - if (verbose) {message("Simulating counts...")} + if (verbose) {message("Simulating cell counts...")} cell.names <- colData(sim)$Cell gene.names <- rowData(sim)$Gene nGenes <- getParam(params, "nGenes") nCells <- getParam(params, "nCells") + nEmpty <- getParam(params, "ambient.nEmpty") cells.means <- assays(sim)$CellMeans - true.counts <- matrix(rpois( - as.numeric(nGenes) * as.numeric(nCells), + cell.counts <- matrix(rpois( + as.numeric(nGenes) * as.numeric(nCells + nEmpty), lambda = cells.means), - nrow = nGenes, ncol = nCells) + nrow = nGenes, ncol = nCells + nEmpty) + + colnames(cell.counts) <- cell.names + rownames(cell.counts) <- gene.names + assays(sim)$CellCounts <- cell.counts + + return(sim) +} + +splotchSimAmbientCounts <- function(sim, params, verbose) { + + if (verbose) {message("Simulating ambient counts...")} + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene + nGenes <- getParam(params, "nGenes") + nCells <- getParam(params, "nCells") + nEmpty <- getParam(params, "ambient.nEmpty") + cell.counts <- assays(sim)$CellCounts + not.empty <- colData(sim)$Type != "Empty" + ambient.lib.sizes <- colData(sim)$AmbientLibSize + + not.empty.means <- rowMeans(cell.counts[, not.empty]) + ambient.props <- not.empty.means / sum(not.empty.means) + + ambient.means <- ambient.props %*% t(ambient.lib.sizes) + + ambient.counts <- matrix(rpois( + as.numeric(nGenes) * as.numeric(nCells + nEmpty), + lambda = ambient.means), + nrow = nGenes, ncol = nCells + nEmpty) + + colnames(ambient.counts) <- cell.names + rownames(ambient.counts) <- gene.names + assays(sim)$AmbientCounts <- ambient.counts + rowData(sim)$AmbientMean <- not.empty.means + + return(sim) +} + +splotchSimCounts <- function(sim, params, verbose) { + + if (verbose) {message("Simulating final counts...")} + cell.lib.sizes <- colData(sim)$CellLibSize + ambient.lib.sizes <- colData(sim)$AmbientLibSize + empty <- colData(sim)$Type == "Empty" + cell.counts <- assays(sim)$CellCounts + ambient.counts <- assays(sim)$AmbientCounts + + lib.sizes <- cell.lib.sizes + lib.sizes[empty] <- ambient.lib.sizes[empty] + + counts <- cell.counts + ambient.counts + + down.prop <- lib.sizes / colSums(counts) + # Avoid proportion creeping over 1 for empty cells + down.prop <- min(down.prop, 1) + + counts <- DropletUtils::downsampleMatrix(counts, down.prop) - colnames(true.counts) <- cell.names - rownames(true.counts) <- gene.names - assays(sim)$counts <- true.counts + assays(sim)$counts <- counts return(sim) } diff --git a/man/SplotchParams.Rd b/man/SplotchParams.Rd index ec9b689..07dc3ba 100644 --- a/man/SplotchParams.Rd +++ b/man/SplotchParams.Rd @@ -75,12 +75,21 @@ The Splotch simulation uses the following parameters: "density" to sample from the provided density object.} } } - \item{\emph{Paths parameters}}{ + \item{\emph{Design parameters}}{ \describe{ \item{\code{[cells.design]}}{data.frame describing cell structure.} } } + \item{\emph{Ambient parameters}}{ + \describe{ + \item{\code{[ambient.scale]}}{Scaling factor for the library + size log-normal distribution when generating ambient library + sizes.} + \item{\code{[ambient.nEmpty]}}{Number of empty cells to + simulate.} + } + } } The parameters not shown in brackets can be estimated from real data using -- GitLab