From 7df8db079e8d2b602be4508c84b6b1229e929ef8 Mon Sep 17 00:00:00 2001
From: Luke Zappia <lazappi@users.noreply.github.com>
Date: Tue, 13 Aug 2019 16:35:38 +1000
Subject: [PATCH] Add splotchEstimate function

---
 DESCRIPTION            |   4 +-
 NAMESPACE              |   3 +
 NEWS.md                |   6 +-
 R/splotch-estimate.R   | 174 +++++++++++++++++++++++++++++++++++++++++
 man/splotchEstimate.Rd |  43 ++++++++++
 5 files changed, 227 insertions(+), 3 deletions(-)
 create mode 100644 R/splotch-estimate.R
 create mode 100644 man/splotchEstimate.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 25f5b91..b2a14bf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: splatter
 Type: Package
 Title: Simple Simulation of Single-cell RNA Sequencing Data
-Version: 1.9.3.9005
-Date: 2019-08-08
+Version: 1.9.3.9006
+Date: 2019-08-13
 Author: Luke Zappia
 Authors@R:
     c(person("Luke", "Zappia", role = c("aut", "cre"),
diff --git a/NAMESPACE b/NAMESPACE
index 769ee81..a2a713a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -19,6 +19,8 @@ S3method(sparseDCEstimate,SingleCellExperiment)
 S3method(sparseDCEstimate,matrix)
 S3method(splatEstimate,SingleCellExperiment)
 S3method(splatEstimate,matrix)
+S3method(splotchEstimate,SingleCellExperiment)
+S3method(splotchEstimate,matrix)
 S3method(zinbEstimate,SingleCellExperiment)
 S3method(zinbEstimate,matrix)
 export(BASiCSEstimate)
@@ -64,6 +66,7 @@ export(splatSimulate)
 export(splatSimulateGroups)
 export(splatSimulatePaths)
 export(splatSimulateSingle)
+export(splotchEstimate)
 export(splotchSimulate)
 export(summariseDiff)
 export(zinbEstimate)
diff --git a/NEWS.md b/NEWS.md
index afaeb75..eec7a01 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,8 @@
-### Version 1.9.3.9005(2019-08-08)
+### Version 1.9.3.9006 (2019-08-13)
+
+* Add splotchEstimate function
+
+### Version 1.9.3.9005 (2019-08-08)
 
 * Simulate counts
 * Split into separate functions for each stage
diff --git a/R/splotch-estimate.R b/R/splotch-estimate.R
new file mode 100644
index 0000000..93f7996
--- /dev/null
+++ b/R/splotch-estimate.R
@@ -0,0 +1,174 @@
+#' Estimate Splotch simulation parameters
+#'
+#' Estimate simulation parameters for the Splotch simulation from a real
+#' dataset. See the individual estimation functions for more details on how this
+#' is done.
+#'
+#' @param counts either a counts matrix or a SingleCellExperiment object
+#'        containing count data to estimate parameters from.
+#' @param params SplotchParams object to store estimated values in.
+#' @param verbose logical. Whether to print progress messages.
+#'
+#' @seealso
+#' \code{\link{splotchEstMean}}, \code{\link{splotchEstLib}}
+#'
+#' @return SplotchParams object containing the estimated parameters.
+#'
+#' @examples
+#' # Load example data
+#' library(scater)
+#' data("sc_example_counts")
+#'
+#' params <- splotchEstimate(sc_example_counts)
+#' params
+#' @export
+splotchEstimate <- function(counts, params = newSplotchParams(),
+                            verbose = TRUE) {
+    UseMethod("splotchEstimate")
+}
+
+#' @rdname splotchEstimate
+#' @export
+splotchEstimate.SingleCellExperiment <- function(counts,
+                                                 params = newSplotchParams(),
+                                                 verbose = TRUE) {
+    counts <- BiocGenerics::counts(counts)
+    splotchEstimate(counts, params, verbose)
+}
+
+#' @rdname splotchEstimate
+#' @importFrom stats median
+#' @export
+splotchEstimate.matrix <- function(counts, params = newSplotchParams(),
+                                   verbose = TRUE) {
+
+    checkmate::assertClass(params, "SplotchParams")
+    checkmate::assertFlag(verbose)
+
+    # Normalise for library size and remove all zero genes
+    lib.sizes <- colSums(counts)
+    lib.med <- median(lib.sizes)
+    norm.counts <- t(t(counts) / lib.sizes * lib.med)
+    norm.counts <- norm.counts[rowSums(norm.counts > 0) > 1, ]
+
+    params <- splotchEstMean(norm.counts, params, verbose)
+    params <- splotchEstLib(counts, params, verbose)
+
+    params <- setParams(params, nGenes = nrow(counts), nCells = ncol(counts))
+
+    return(params)
+}
+
+splotchEstMean <- function(norm.counts, params, verbose) {
+
+    if (verbose) {message("Estimating mean parameters...")}
+
+    means <- rowMeans(norm.counts)
+    means <- means[means != 0]
+    non.zero <- rowSums(norm.counts > 0)
+
+    fit <- selectFit(means, "gamma", non.zero, verbose)
+
+    params <- setParams(params, mean.shape = unname(fit$estimate["shape"]),
+                        mean.rate = unname(fit$estimate["rate"]))
+
+    return(params)
+}
+
+splotchEstLib <- function(counts, params, verbose) {
+
+    if (verbose) {message("Estimating library size parameters...")}
+
+    lib.sizes <- colSums(counts)
+
+    fit <- selectFit(lib.sizes, "lnorm", verbose = verbose)
+
+    lib.loc <- unname(fit$estimate["meanlog"])
+    lib.scale <- unname(fit$estimate["sdlog"])
+
+    params <- setParams(params, lib.loc = lib.loc, lib.scale = lib.scale)
+
+    return(params)
+}
+
+selectFit <- function(data, distr, weights = NULL, verbose = TRUE) {
+
+    checkmate::assertNumeric(data, finite = TRUE, any.missing = FALSE)
+    checkmate::assertString(distr)
+    checkmate::assertNumeric(weights, finite = TRUE, any.missing = TRUE,
+                             len = length(data), null.ok = TRUE)
+    checkmate::assertFlag(verbose)
+
+    # Sink output that sometimes happens when fitting
+    sink(tempfile())
+    on.exit(sink())
+
+    fits <- list()
+
+    try(
+        fits$`MLE` <- fitdistrplus::fitdist(data, distr, method = "mle"),
+        silent = TRUE
+    )
+
+    try(
+        fits$`MME` <- fitdistrplus::fitdist(data, distr, method = "mme"),
+        silent = TRUE
+    )
+
+    try(
+        fits$`QME` <- fitdistrplus::fitdist(data, distr, method = "qme",
+                                            probs = c(1/3, 2/3)),
+        silent = TRUE
+    )
+
+    try(
+        fits$`MGE (CvM)` <- fitdistrplus::fitdist(data, distr, method = "mge",
+                                                  gof = "CvM"),
+        silent = TRUE
+    )
+
+    try(
+        fits$`MGE (KS)` <- fitdistrplus::fitdist(data, distr, method = "mge",
+                                                 gof = "KS"),
+        silent = TRUE
+    )
+
+    try(
+        fits$`MGE (AD)` <- fitdistrplus::fitdist(data, distr, method = "mge",
+                                                 gof = "AD"),
+        silent = TRUE
+    )
+
+    if (!is.null(weights)) {
+        try(suppressWarnings(
+            fits$`Weighted MLE` <- fitdistrplus::fitdist(data, distr,
+                                                         method = "mle",
+                                                         weights = weights)
+            ), silent = TRUE
+        )
+
+        try(suppressWarnings(
+            fits$`Weighted MME` <- fitdistrplus::fitdist(data, distr,
+                                                         method = "mme",
+                                                         weights = weights)
+            ), silent = TRUE
+        )
+
+        try(suppressWarnings(
+            fits$`Weighted QME` <- fitdistrplus::fitdist(data, distr,
+                                                         method = "qme",
+                                                         probs = c(1/3, 2/3),
+                                                         weights = weights)
+            ), silent = TRUE
+        )
+    }
+
+    aics <- fitdistrplus::gofstat(fits)$aic
+    selected <- which(aics == min(aics, na.rm = TRUE))
+
+    if (verbose) {
+        message("Selected ", names(fits)[selected], " fit using AIC")
+    }
+
+    return(fits[[selected]])
+}
diff --git a/man/splotchEstimate.Rd b/man/splotchEstimate.Rd
new file mode 100644
index 0000000..e35772f
--- /dev/null
+++ b/man/splotchEstimate.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splotch-estimate.R
+\name{splotchEstimate}
+\alias{splotchEstimate}
+\alias{splotchEstimate.SingleCellExperiment}
+\alias{splotchEstimate.matrix}
+\title{Estimate Splotch simulation parameters}
+\usage{
+splotchEstimate(counts, params = newSplotchParams(), verbose = TRUE)
+
+\method{splotchEstimate}{SingleCellExperiment}(counts,
+  params = newSplotchParams(), verbose = TRUE)
+
+\method{splotchEstimate}{matrix}(counts, params = newSplotchParams(),
+  verbose = TRUE)
+}
+\arguments{
+\item{counts}{either a counts matrix or a SingleCellExperiment object
+containing count data to estimate parameters from.}
+
+\item{params}{SplotchParams object to store estimated values in.}
+
+\item{verbose}{logical. Whether to print progress messages.}
+}
+\value{
+SplotchParams object containing the estimated parameters.
+}
+\description{
+Estimate simulation parameters for the Splotch simulation from a real
+dataset. See the individual estimation functions for more details on how this
+is done.
+}
+\examples{
+# Load example data
+library(scater)
+data("sc_example_counts")
+
+params <- splotchEstimate(sc_example_counts)
+params
+}
+\seealso{
+\code{\link{splotchEstMean}}, \code{\link{splotchEstLib}}
+}
-- 
GitLab