From 7df8db079e8d2b602be4508c84b6b1229e929ef8 Mon Sep 17 00:00:00 2001 From: Luke Zappia <lazappi@users.noreply.github.com> Date: Tue, 13 Aug 2019 16:35:38 +1000 Subject: [PATCH] Add splotchEstimate function --- DESCRIPTION | 4 +- NAMESPACE | 3 + NEWS.md | 6 +- R/splotch-estimate.R | 174 +++++++++++++++++++++++++++++++++++++++++ man/splotchEstimate.Rd | 43 ++++++++++ 5 files changed, 227 insertions(+), 3 deletions(-) create mode 100644 R/splotch-estimate.R create mode 100644 man/splotchEstimate.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 25f5b91..b2a14bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 1.9.3.9005 -Date: 2019-08-08 +Version: 1.9.3.9006 +Date: 2019-08-13 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index 769ee81..a2a713a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,8 @@ S3method(sparseDCEstimate,SingleCellExperiment) S3method(sparseDCEstimate,matrix) S3method(splatEstimate,SingleCellExperiment) S3method(splatEstimate,matrix) +S3method(splotchEstimate,SingleCellExperiment) +S3method(splotchEstimate,matrix) S3method(zinbEstimate,SingleCellExperiment) S3method(zinbEstimate,matrix) export(BASiCSEstimate) @@ -64,6 +66,7 @@ export(splatSimulate) export(splatSimulateGroups) export(splatSimulatePaths) export(splatSimulateSingle) +export(splotchEstimate) export(splotchSimulate) export(summariseDiff) export(zinbEstimate) diff --git a/NEWS.md b/NEWS.md index afaeb75..eec7a01 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ -### Version 1.9.3.9005(2019-08-08) +### Version 1.9.3.9006 (2019-08-13) + +* Add splotchEstimate function + +### Version 1.9.3.9005 (2019-08-08) * Simulate counts * Split into separate functions for each stage diff --git a/R/splotch-estimate.R b/R/splotch-estimate.R new file mode 100644 index 0000000..93f7996 --- /dev/null +++ b/R/splotch-estimate.R @@ -0,0 +1,174 @@ +#' Estimate Splotch simulation parameters +#' +#' Estimate simulation parameters for the Splotch simulation from a real +#' dataset. See the individual estimation functions for more details on how this +#' is done. +#' +#' @param counts either a counts matrix or a SingleCellExperiment object +#' containing count data to estimate parameters from. +#' @param params SplotchParams object to store estimated values in. +#' @param verbose logical. Whether to print progress messages. +#' +#' @seealso +#' \code{\link{splotchEstMean}}, \code{\link{splotchEstLib}} +#' +#' @return SplotchParams object containing the estimated parameters. +#' +#' @examples +#' # Load example data +#' library(scater) +#' data("sc_example_counts") +#' +#' params <- splotchEstimate(sc_example_counts) +#' params +#' @export +splotchEstimate <- function(counts, params = newSplotchParams(), + verbose = TRUE) { + UseMethod("splotchEstimate") +} + +#' @rdname splotchEstimate +#' @export +splotchEstimate.SingleCellExperiment <- function(counts, + params = newSplotchParams(), + verbose = TRUE) { + counts <- BiocGenerics::counts(counts) + splotchEstimate(counts, params, verbose) +} + +#' @rdname splotchEstimate +#' @importFrom stats median +#' @export +splotchEstimate.matrix <- function(counts, params = newSplotchParams(), + verbose = TRUE) { + + checkmate::assertClass(params, "SplotchParams") + checkmate::assertFlag(verbose) + + # Normalise for library size and remove all zero genes + lib.sizes <- colSums(counts) + lib.med <- median(lib.sizes) + norm.counts <- t(t(counts) / lib.sizes * lib.med) + norm.counts <- norm.counts[rowSums(norm.counts > 0) > 1, ] + + params <- splotchEstMean(norm.counts, params, verbose) + params <- splotchEstLib(counts, params, verbose) + + params <- setParams(params, nGenes = nrow(counts), nCells = ncol(counts)) + + return(params) +} + +splotchEstMean <- function(norm.counts, params, verbose) { + + if (verbose) {message("Estimating mean parameters...")} + + means <- rowMeans(norm.counts) + means <- means[means != 0] + non.zero <- rowSums(norm.counts > 0) + + fit <- selectFit(means, "gamma", non.zero, verbose) + + params <- setParams(params, mean.shape = unname(fit$estimate["shape"]), + mean.rate = unname(fit$estimate["rate"])) + + return(params) +} + +splotchEstLib <- function(counts, params, verbose) { + + if (verbose) {message("Estimating library size parameters...")} + + lib.sizes <- colSums(counts) + + fit <- selectFit(lib.sizes, "lnorm", verbose = verbose) + + lib.loc <- unname(fit$estimate["meanlog"]) + lib.scale <- unname(fit$estimate["sdlog"]) + + params <- setParams(params, lib.loc = lib.loc, lib.scale = lib.scale) + + return(params) +} + +selectFit <- function(data, distr, weights = NULL, verbose = TRUE) { + + checkmate::assertNumeric(data, finite = TRUE, any.missing = FALSE) + checkmate::assertString(distr) + checkmate::assertNumeric(weights, finite = TRUE, any.missing = TRUE, + len = length(data), null.ok = TRUE) + checkmate::assertFlag(verbose) + + # Sink output that sometimes happens when fitting + sink(tempfile()) + on.exit(sink()) + + fits <- list() + + try( + fits$`MLE` <- fitdistrplus::fitdist(data, distr, method = "mle"), + silent = TRUE + ) + + try( + fits$`MME` <- fitdistrplus::fitdist(data, distr, method = "mme"), + silent = TRUE + ) + + try( + fits$`QME` <- fitdistrplus::fitdist(data, distr, method = "qme", + probs = c(1/3, 2/3)), + silent = TRUE + ) + + try( + fits$`MGE (CvM)` <- fitdistrplus::fitdist(data, distr, method = "mge", + gof = "CvM"), + silent = TRUE + ) + + try( + fits$`MGE (KS)` <- fitdistrplus::fitdist(data, distr, method = "mge", + gof = "KS"), + silent = TRUE + ) + + try( + fits$`MGE (AD)` <- fitdistrplus::fitdist(data, distr, method = "mge", + gof = "AD"), + silent = TRUE + ) + + if (!is.null(weights)) { + try(suppressWarnings( + fits$`Weighted MLE` <- fitdistrplus::fitdist(data, distr, + method = "mle", + weights = weights) + ), silent = TRUE + ) + + try(suppressWarnings( + fits$`Weighted MME` <- fitdistrplus::fitdist(data, distr, + method = "mme", + weights = weights) + ), silent = TRUE + ) + + try(suppressWarnings( + fits$`Weighted QME` <- fitdistrplus::fitdist(data, distr, + method = "qme", + probs = c(1/3, 2/3), + weights = weights) + ), silent = TRUE + ) + } + + aics <- fitdistrplus::gofstat(fits)$aic + selected <- which(aics == min(aics, na.rm = TRUE)) + + if (verbose) { + message("Selected ", names(fits)[selected], " fit using AIC") + } + + return(fits[[selected]]) +} diff --git a/man/splotchEstimate.Rd b/man/splotchEstimate.Rd new file mode 100644 index 0000000..e35772f --- /dev/null +++ b/man/splotchEstimate.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-estimate.R +\name{splotchEstimate} +\alias{splotchEstimate} +\alias{splotchEstimate.SingleCellExperiment} +\alias{splotchEstimate.matrix} +\title{Estimate Splotch simulation parameters} +\usage{ +splotchEstimate(counts, params = newSplotchParams(), verbose = TRUE) + +\method{splotchEstimate}{SingleCellExperiment}(counts, + params = newSplotchParams(), verbose = TRUE) + +\method{splotchEstimate}{matrix}(counts, params = newSplotchParams(), + verbose = TRUE) +} +\arguments{ +\item{counts}{either a counts matrix or a SingleCellExperiment object +containing count data to estimate parameters from.} + +\item{params}{SplotchParams object to store estimated values in.} + +\item{verbose}{logical. Whether to print progress messages.} +} +\value{ +SplotchParams object containing the estimated parameters. +} +\description{ +Estimate simulation parameters for the Splotch simulation from a real +dataset. See the individual estimation functions for more details on how this +is done. +} +\examples{ +# Load example data +library(scater) +data("sc_example_counts") + +params <- splotchEstimate(sc_example_counts) +params +} +\seealso{ +\code{\link{splotchEstMean}}, \code{\link{splotchEstLib}} +} -- GitLab