From 23992b03c8a7de305b28023e50cbb1d5369bf9c1 Mon Sep 17 00:00:00 2001 From: Luke Zappia <lazappi@users.noreply.github.com> Date: Thu, 29 Aug 2019 16:57:09 +1000 Subject: [PATCH] Add BCV adjustment to Splotch New exponential correction method for bcv.common estimates --- DESCRIPTION | 4 +-- NEWS.md | 5 ++++ R/AllClasses.R | 12 ++++++++ R/SplotchParams-methods.R | 20 ++++++++----- R/splotch-estimate.R | 61 +++++++++++++++++++++++++++++++++++++++ R/splotch-simulate.R | 17 +++++++++++ man/SplotchParams.Rd | 8 +++++ 7 files changed, 117 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1b17759..e4b7be8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 1.9.4.9010 -Date: 2019-08-22 +Version: 1.9.4.9011 +Date: 2019-08-29 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 99438d9..c655734 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +### Version 1.9.4.9011 (2019-08-29) + +* Add BCV adjustment to Splotch simualation +* Use new exponential correction for bcv.common + ### Version 1.9.4.9010 (2019-08-22) * Add density sampling options for means and library sizes diff --git a/R/AllClasses.R b/R/AllClasses.R index f14b964..18d6b62 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -288,6 +288,14 @@ setClass("SplatParams", #' \item{\code{[mean.values]}}{Vector of means for each gene.} #' } #' } +#' \item{\emph{Biological Coefficient of Variation parameters}}{ +#' \describe{ +#' \item{\code{bcv.common}}{Underlying common dispersion across all +#' genes.} +#' \item{\code{[bcv.df]}}{Degrees of Freedom for the BCV inverse +#' chi-squared distribution.} +#' } +#' } #' \item{\emph{Network parameters}}{ #' \describe{ #' \item{\code{[network.graph]}}{Graph containing the gene network.} @@ -343,6 +351,8 @@ setClass("SplotchParams", mean.dens = "density", mean.method = "character", mean.values = "numeric", + bcv.common = "numeric", + bcv.df = "numeric", network.graph = "ANY", network.nRegs = "numeric", network.regsSet = "logical", @@ -363,6 +373,8 @@ setClass("SplotchParams", shape = 0.6)), mean.method = "fit", mean.values = numeric(), + bcv.common = 0.1, + bcv.df = 60, network.graph = NULL, network.nRegs = 100, network.regsSet = FALSE, diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R index 56a1b6c..2f84c91 100644 --- a/R/SplotchParams-methods.R +++ b/R/SplotchParams-methods.R @@ -28,6 +28,8 @@ setValidity("SplotchParams", function(object) { mean.dens = checkmate::checkClass(v$mean.dens, "density"), mean.method = checkmate::checkChoice(v$mean.method, c("fit", "density")), + bcv.common = checkNumber(v$bcv.common, lower = 0), + bcv.df = checkNumber(v$bcv.df, lower = 0), network.graph = checkmate::checkClass(v$network, "igraph", null.ok = TRUE), network.nRegs = checkmate::checkInt(v$network.nRegs, @@ -129,13 +131,15 @@ setMethod("show", "SplotchParams", function(object) { "(Out Prob)" = "mean.outProb", "(Out Location)" = "mean.outLoc", "(Out Scale)" = "mean.outScale", - "(Density)" = "mean.density", + "(Density)" = "mean.dens", "[Method]" = "mean.method", - "[Values]*" = "mean.values")) + "[Values]*" = "mean.values"), + "BCV:" = c("(Common Disp)" = "bcv.common", + "[DoF]" = "bcv.df")) - pp.network <- list("Network:" = c("[Graph]" = "network.graph", - "[nRegs]" = "network.nRegs", - "[regsSet]" = "network.regsSet")) + pp.network <- list("Network:" = c("[Graph]*" = "network.graph", + "[nRegs]" = "network.nRegs", + "[regsSet]*" = "network.regsSet")) pp.paths <- list("Paths:" = c("[nPrograms]" = "paths.nPrograms", "[Design]" = "paths.design")) @@ -160,15 +164,15 @@ setMethod("show", "SplotchParams", function(object) { showPP(object, pp.network) } else { cat(crayon::bold("Network:"), "\n") - cat(crayon::bold(crayon::blue("[GRAPH]\n"))) + cat(crayon::bold(crayon::bgYellow(crayon::blue("[GRAPH]\n")))) cat(crayon::bold(crayon::green(paste( "Graph with", igraph::gorder(network.graph), "nodes and", igraph::gsize(network.graph), "edges\n" )))) show(network.graph) network.values <- list("[nRegs]" = getParam(object, "network.nRegs"), - "[regsSet]" = getParam(object, - "network.regsSet")) + "[regsSet]*" = getParam(object, + "network.regsSet")) network.default <- c(network.values$`[nRegs]` != 100, network.values$`[regsSet]` != FALSE) showValues(network.values, network.default) diff --git a/R/splotch-estimate.R b/R/splotch-estimate.R index cae0f88..d876d4d 100644 --- a/R/splotch-estimate.R +++ b/R/splotch-estimate.R @@ -53,6 +53,7 @@ splotchEstimate.matrix <- function(counts, params = newSplotchParams(), norm.counts <- norm.counts[rowSums(norm.counts > 0) > 1, ] params <- splotchEstMean(norm.counts, params, verbose) + params <- splotchEstBCV(counts, params, verbose) params <- splotchEstLib(counts, params, verbose) params <- setParams(params, nGenes = nrow(counts), nCells = ncol(counts)) @@ -101,6 +102,66 @@ splotchEstMean <- function(norm.counts, params, verbose) { return(params) } +splotchEstBCV <- function(counts, params, verbose) { + + if (verbose) {message("Estimating BCV parameters...")} + + # Add dummy design matrix to avoid print statement + design <- matrix(1, ncol(counts), 1) + disps <- edgeR::estimateDisp(counts, design = design) + raw <- disps$common.dispersion + + mean.rate <- getParam(params, "mean.rate") + mean.shape <- getParam(params, "mean.shape") + + # Caculate factors for correction based on mean parameters + # Coefficents come from fitting + # RawEst ~ + # (A1 * mean.rate + A2 * mean.shape + A3 * mean.rate * + # E_mean.shape + A4) * + # ((B1 * mean.rate + B2 * mean.shape + B3 * mean.rate * + # mean.shape + B4) ^ SimBCVCommon) + + # (C1 * mean.rate + C2 * mean.shape + C3 * mean.rate * + # mean.shape + C4) + # Using minpack.lm::nlsLM + A <- -0.6 * mean.rate - 2.9 * mean.shape + + 0.4 * mean.rate * mean.shape + 9.5 + B <- 0.15 * mean.rate + 0.25 * mean.shape - + 0.1 * mean.rate * mean.shape + 1.2 + C <- 0.9 * mean.rate + 4.5 * mean.shape - + 0.6 * mean.rate * mean.shape - 10.6 + Y <- (raw - C) / A + + message("Raw: ", raw, " A: ", A, " B: ", B, " C: ", C, " Y: ", Y) + + # Check if Y <= 0 to avoid problems when taking log + if (Y <= 0) { + Y <- 0.0001 + } + + corrected <- log(Y, base = B) + + # Dispersion cannot be negative so apply a simpler correction to those. + # Coefficients come from fitting + # SimBCVCommon ~ EstRaw + # Using lm (negative values only) + if (corrected < 0) { + warning("Exponential corrected BCV is negative.", + "Using linear correction.") + corrected <- -0.1 + 0.1 * raw + } + + if (corrected < 0) { + warning("Linear corrected BCV is negative.", + "Using existing bcv.common.") + corrected <- getParam(params, "bcv.common") + } + + params <- setParams(params, bcv.common = corrected) + + return(params) +} + #' @importFrom stats density splotchEstLib <- function(counts, params, verbose) { diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R index 5632389..faa73f9 100644 --- a/R/splotch-simulate.R +++ b/R/splotch-simulate.R @@ -321,6 +321,23 @@ splotchSimCellMeans <- function(sim, params, verbose) { colnames(cells.means) <- cell.names rownames(cells.means) <- gene.names + nGenes <- getParam(params, "nGenes") + bcv.common <- getParam(params, "bcv.common") + bcv.df <- getParam(params, "bcv.df") + + if (is.finite(bcv.df)) { + bcv <- (bcv.common + (1 / sqrt(cells.means))) * + sqrt(bcv.df / rchisq(nGenes, df = bcv.df)) + } else { + warning("'bcv.df' is infinite. This parameter will be ignored.") + bcv <- (bcv.common + (1 / sqrt(cells.means))) + } + + cells.means <- matrix(rgamma( + as.numeric(nGenes) * as.numeric(nCells), + shape = 1 / (bcv ^ 2), scale = cells.means * (bcv ^ 2)), + nrow = nGenes, ncol = nCells) + colData(sim)$Path <- cells.paths colData(sim)$Step <- cells.steps assays(sim)$CellMeans <- cells.means diff --git a/man/SplotchParams.Rd b/man/SplotchParams.Rd index 0b03061..ec9b689 100644 --- a/man/SplotchParams.Rd +++ b/man/SplotchParams.Rd @@ -38,6 +38,14 @@ The Splotch simulation uses the following parameters: \item{\code{[mean.values]}}{Vector of means for each gene.} } } + \item{\emph{Biological Coefficient of Variation parameters}}{ + \describe{ + \item{\code{bcv.common}}{Underlying common dispersion across all + genes.} + \item{\code{[bcv.df]}}{Degrees of Freedom for the BCV inverse + chi-squared distribution.} + } + } \item{\emph{Network parameters}}{ \describe{ \item{\code{[network.graph]}}{Graph containing the gene network.} -- GitLab