From 66435625328e34d12750c855be8244cb96b298f2 Mon Sep 17 00:00:00 2001
From: Luke Zappia <lazappi@users.noreply.github.com>
Date: Thu, 10 Oct 2019 11:13:18 +1100
Subject: [PATCH] Document Splotch estimation functions

---
 DESCRIPTION               |  4 +-
 NAMESPACE                 |  8 ----
 NEWS.md                   |  4 ++
 R/SplotchParams-methods.R |  4 +-
 R/splat-estimate.R        |  4 +-
 R/splotch-estimate.R      | 79 ++++++++++++++++++++++++++++++++++++++-
 R/splotch-simulate.R      |  8 ----
 man/selectFit.Rd          | 28 ++++++++++++++
 man/splatEstLib.Rd        |  2 +-
 man/splatEstMean.Rd       |  2 +-
 man/splotchEstBCV.Rd      | 29 ++++++++++++++
 man/splotchEstLib.Rd      | 28 ++++++++++++++
 man/splotchEstMean.Rd     | 37 ++++++++++++++++++
 man/splotchEstimate.Rd    |  3 +-
 14 files changed, 214 insertions(+), 26 deletions(-)
 create mode 100644 man/selectFit.Rd
 create mode 100644 man/splotchEstBCV.Rd
 create mode 100644 man/splotchEstLib.Rd
 create mode 100644 man/splotchEstMean.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 463be27..707e1cf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: splatter
 Type: Package
 Title: Simple Simulation of Single-cell RNA Sequencing Data
-Version: 1.9.7.9017
-Date: 2019-10-09
+Version: 1.9.7.9018
+Date: 2019-10-10
 Author: Luke Zappia
 Authors@R:
     c(person("Luke", "Zappia", role = c("aut", "cre"),
diff --git a/NAMESPACE b/NAMESPACE
index 98b1372..b0d1768 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -67,16 +67,8 @@ export(splatSimulateGroups)
 export(splatSimulatePaths)
 export(splatSimulateSingle)
 export(splotchEstimate)
-export(splotchGenNetwork)
 export(splotchSample)
-export(splotchSelectRegs)
 export(splotchSetup)
-export(splotchSimAmbientCounts)
-export(splotchSimCellCounts)
-export(splotchSimCellMeans)
-export(splotchSimCounts)
-export(splotchSimGeneMeans)
-export(splotchSimLibSizes)
 export(splotchSimulate)
 export(summariseDiff)
 export(zinbEstimate)
diff --git a/NEWS.md b/NEWS.md
index 938037b..66387ad 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+### Version 1.9.7.9018 (2019-10-10)
+
+* Document Splotch estimation functions
+
 ### Version 1.9.7.9017 (2019-10-09)
 
 * Document Splotch simulation functions
diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R
index de38b02..46e572f 100644
--- a/R/SplotchParams-methods.R
+++ b/R/SplotchParams-methods.R
@@ -13,8 +13,8 @@ newSplotchParams <- function(...) {
 
     msg <- paste("The Splotch simulation is still experimental and may produce",
                  "unreliable results. Please try it and report any issues to",
-                 "https://github.com/Oshlack/splatter/issues. The development
-                 version may have improved features.")
+                 "https://github.com/Oshlack/splatter/issues. The development",
+                 "version may have improved features.")
     rlang:::warn_deprecated(msg, id = "warn.splotch")
 
     params <- new("SplotchParams")
diff --git a/R/splat-estimate.R b/R/splat-estimate.R
index 0d72013..459606d 100644
--- a/R/splat-estimate.R
+++ b/R/splat-estimate.R
@@ -70,7 +70,7 @@ splatEstimate.matrix <- function(counts, params = newSplatParams()) {
 #' @param params SplatParams object to store estimated values in.
 #'
 #' @details
-#' Parameter for the gamma distribution are estimated by fitting the mean
+#' Parameters for the gamma distribution are estimated by fitting the mean
 #' normalised counts using \code{\link[fitdistrplus]{fitdist}}. The 'maximum
 #' goodness-of-fit estimation' method is used to minimise the Cramer-von Mises
 #' distance. This can fail in some situations, in which case the 'method of
@@ -111,7 +111,7 @@ splatEstMean <- function(norm.counts, params) {
 #' @param counts counts matrix to estimate parameters from.
 #' @param params splatParams object to store estimated values in.
 #'
-#' @return splatParams object with estimated values.
+#' @return SplatParams object with estimated values.
 #'
 #' @importFrom stats shapiro.test
 splatEstLib <- function(counts, params) {
diff --git a/R/splotch-estimate.R b/R/splotch-estimate.R
index d876d4d..9756bb3 100644
--- a/R/splotch-estimate.R
+++ b/R/splotch-estimate.R
@@ -10,7 +10,8 @@
 #' @param verbose logical. Whether to print progress messages.
 #'
 #' @seealso
-#' \code{\link{splotchEstMean}}, \code{\link{splotchEstLib}}
+#' \code{\link{splotchEstMean}},  \code{\link{splotchEstBCV}},
+#' \code{\link{splotchEstLib}}
 #'
 #' @return SplotchParams object containing the estimated parameters.
 #'
@@ -61,6 +62,33 @@ splotchEstimate.matrix <- function(counts, params = newSplotchParams(),
     return(params)
 }
 
+#' Estimate Splotch means
+#'
+#' Estimate mean parameters for the Splotch simulation
+#'
+#' @param norm.counts library size normalised counts matrix.
+#' @param params SplotchParams object to store estimated values in.
+#' @param verbose logical. Whether to print progress messages
+#'
+#' @details
+#' Parameters for the gamma distribution are estimated by fitting the mean
+#' normalised counts using \code{\link[fitdistrplus]{fitdist}}. All the fitting
+#' methods are tried and the fit with the best Cramer-von Mises statistic is
+#' selected. The density of the means is also estimated using
+#' \code{\link[stats]{density}}.
+#'
+#' Expression outlier genes are detected using the Median Absolute Deviation
+#' (MAD) from median method. If the log2 mean expression of a gene is greater
+#' than two MADs above the median log2 mean expression it is designated as an
+#' outlier. The proportion of outlier genes is used to estimate the outlier
+#' probability. Factors for each outlier gene are calculated by dividing mean
+#' expression by the median mean expression. A log-normal distribution is then
+#' fitted to these factors in order to estimate the outlier factor location and
+#' scale parameters using the \code{\link[fitdistrplus]{fitdist}} MLE method.
+#'
+#' @return SplotchParams object with estimated means
+#'
+#' @importFrom stats density
 splotchEstMean <- function(norm.counts, params, verbose) {
 
     if (verbose) {message("Estimating mean parameters...")}
@@ -102,6 +130,23 @@ splotchEstMean <- function(norm.counts, params, verbose) {
     return(params)
 }
 
+#' Estimate Splotch BCV parameters
+#'
+#' Estimate Biological Coefficient of Variation (BCV) parameters for the Splotch
+#' simulation
+#'
+#' @param counts counts matrix.
+#' @param params SplotchParams object to store estimated values in.
+#' @param verbose logical. Whether to print progress messages
+#'
+#' @details
+#' The \code{\link[edgeR]{estimateDisp}} function is used to estimate the common
+#' dispersion across the dataset. An exponential correction is applied based on
+#' fitting an exponential relationship between simulated and estimated values.
+#' If this results in a negative dispersion a simpler linear correction is
+#' applied instead.
+#'
+#' @return SplotchParams object with estimated BCV parameters
 splotchEstBCV <- function(counts, params, verbose) {
 
     if (verbose) {message("Estimating BCV parameters...")}
@@ -162,6 +207,23 @@ splotchEstBCV <- function(counts, params, verbose) {
     return(params)
 }
 
+#' Estimate Splotch library size parameters
+#'
+#' Estimate the library size parameters for the Splotch simulation
+#'
+#' @param counts counts matrix.
+#' @param params SplotchParams object to store estimated values in.
+#' @param verbose logical. Whether to print progress messages
+#'
+#' @details
+#' Parameters for the log-normal distribution are estimated by fitting the
+#' library sizes using \code{\link[fitdistrplus]{fitdist}}. All the fitting
+#' methods are tried and the fit with the best Cramer-von Mises statistic is
+#' selected. The density of the library sizes is also estimated using
+#' \code{\link[stats]{density}}.
+#'
+#' @return SplotchParams object with library size parameters
+#'
 #' @importFrom stats density
 splotchEstLib <- function(counts, params, verbose) {
 
@@ -182,6 +244,21 @@ splotchEstLib <- function(counts, params, verbose) {
     return(params)
 }
 
+#' Select fit
+#'
+#' Try a variety of fitting methods and select the best one
+#'
+#' @param data The data to fit
+#' @param distr Name of the distribution to fit
+#' @param weights Optional vector of weigths
+#' @param verbose logical. Whether to print progress messages
+#'
+#' @details
+#' The distribution is fitted to the data using each of the
+#' \code{\link[fitdistrplus]{fitdist}} fitting methods. The fit with the
+#' smallest Cramer-von Mises statistic is selected.
+#'
+#' @return The selected fit object
 selectFit <- function(data, distr, weights = NULL, verbose = TRUE) {
 
     checkmate::assertNumeric(data, finite = TRUE, any.missing = FALSE)
diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R
index 1b73183..9e49571 100644
--- a/R/splotch-simulate.R
+++ b/R/splotch-simulate.R
@@ -256,7 +256,6 @@ splotchSample <- function(params, verbose = TRUE) {
 #' function and edge weights are sampled from a standard normal distribution.
 #'
 #' @return SplotchParams object with gene network
-#' @export
 splotchGenNetwork <- function(params, verbose) {
 
     nGenes <- getParam(params, "nGenes")
@@ -293,7 +292,6 @@ splotchGenNetwork <- function(params, verbose) {
 #' may be improved or replace in the future.
 #'
 #' @return SplotchParams object with gene regulators
-#' @export
 splotchSelectRegs <- function(params, verbose) {
 
     network.regsSet <- getParam(params, "network.regsSet")
@@ -344,7 +342,6 @@ splotchSelectRegs <- function(params, verbose) {
 #' statistical assumptions.
 #'
 #' @return SplotchParams object with gene means
-#' @export
 splotchSimGeneMeans <- function(params, verbose) {
 
     mean.values <- getParam(params, "mean.values")
@@ -522,7 +519,6 @@ splotchSimPaths <- function(params, verbose) {
 #' \code{ambient.scale} parameter.
 #'
 #' @return SingleCellExperiment with library sizes
-#' @export
 splotchSimLibSizes <- function(sim, params, verbose) {
 
     if (verbose) {message("Simulating library sizes...")}
@@ -610,7 +606,6 @@ splotchSimLibSizes <- function(sim, params, verbose) {
 #' the means.
 #'
 #' @return SingleCellExperiment with cell means
-#' @export
 splotchSimCellMeans <- function(sim, params, verbose) {
 
     cell.names <- colData(sim)$Cell
@@ -752,7 +747,6 @@ splotchSimCellMeans <- function(sim, params, verbose) {
 #' cell means matrix.
 #'
 #' @return SingleCellExperiment with cell counts
-#' @export
 splotchSimCellCounts <- function(sim, params, verbose) {
 
     if (verbose) {message("Simulating cell counts...")}
@@ -788,7 +782,6 @@ splotchSimCellCounts <- function(sim, params, verbose) {
 #' distribution using these means.
 #'
 #' @return SingleCellExperiment with ambient counts
-#' @export
 splotchSimAmbientCounts <- function(sim, params, verbose) {
 
     if (verbose) {message("Simulating ambient counts...")}
@@ -836,7 +829,6 @@ splotchSimAmbientCounts <- function(sim, params, verbose) {
 #' @seealso \code{\link[DropletUtils]{downsampleMatrix}}
 #'
 #' @return SingleCellExperiment with counts matrix
-#' @export
 splotchSimCounts <- function(sim, params, verbose) {
 
     if (verbose) {message("Simulating final counts...")}
diff --git a/man/selectFit.Rd b/man/selectFit.Rd
new file mode 100644
index 0000000..cb202d2
--- /dev/null
+++ b/man/selectFit.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splotch-estimate.R
+\name{selectFit}
+\alias{selectFit}
+\title{Select fit}
+\usage{
+selectFit(data, distr, weights = NULL, verbose = TRUE)
+}
+\arguments{
+\item{data}{The data to fit}
+
+\item{distr}{Name of the distribution to fit}
+
+\item{weights}{Optional vector of weigths}
+
+\item{verbose}{logical. Whether to print progress messages}
+}
+\value{
+The selected fit object
+}
+\description{
+Try a variety of fitting methods and select the best one
+}
+\details{
+The distribution is fitted to the data using each of the
+\code{\link[fitdistrplus]{fitdist}} fitting methods. The fit with the
+smallest Cramer-von Mises statistic is selected.
+}
diff --git a/man/splatEstLib.Rd b/man/splatEstLib.Rd
index 30df262..29f69c1 100644
--- a/man/splatEstLib.Rd
+++ b/man/splatEstLib.Rd
@@ -12,7 +12,7 @@ splatEstLib(counts, params)
 \item{params}{splatParams object to store estimated values in.}
 }
 \value{
-splatParams object with estimated values.
+SplatParams object with estimated values.
 }
 \description{
 The Shapiro-Wilks test is used to determine if the library sizes are
diff --git a/man/splatEstMean.Rd b/man/splatEstMean.Rd
index c019f67..a9192df 100644
--- a/man/splatEstMean.Rd
+++ b/man/splatEstMean.Rd
@@ -19,7 +19,7 @@ Estimate rate and shape parameters for the gamma distribution used to
 simulate gene expression means.
 }
 \details{
-Parameter for the gamma distribution are estimated by fitting the mean
+Parameters for the gamma distribution are estimated by fitting the mean
 normalised counts using \code{\link[fitdistrplus]{fitdist}}. The 'maximum
 goodness-of-fit estimation' method is used to minimise the Cramer-von Mises
 distance. This can fail in some situations, in which case the 'method of
diff --git a/man/splotchEstBCV.Rd b/man/splotchEstBCV.Rd
new file mode 100644
index 0000000..ebca37c
--- /dev/null
+++ b/man/splotchEstBCV.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splotch-estimate.R
+\name{splotchEstBCV}
+\alias{splotchEstBCV}
+\title{Estimate Splotch BCV parameters}
+\usage{
+splotchEstBCV(counts, params, verbose)
+}
+\arguments{
+\item{counts}{counts matrix.}
+
+\item{params}{SplotchParams object to store estimated values in.}
+
+\item{verbose}{logical. Whether to print progress messages}
+}
+\value{
+SplotchParams object with estimated BCV parameters
+}
+\description{
+Estimate Biological Coefficient of Variation (BCV) parameters for the Splotch
+simulation
+}
+\details{
+The \code{\link[edgeR]{estimateDisp}} function is used to estimate the common
+dispersion across the dataset. An exponential correction is applied based on
+fitting an exponential relationship between simulated and estimated values.
+If this results in a negative dispersion a simpler linear correction is
+applied instead.
+}
diff --git a/man/splotchEstLib.Rd b/man/splotchEstLib.Rd
new file mode 100644
index 0000000..b606d31
--- /dev/null
+++ b/man/splotchEstLib.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splotch-estimate.R
+\name{splotchEstLib}
+\alias{splotchEstLib}
+\title{Estimate Splotch library size parameters}
+\usage{
+splotchEstLib(counts, params, verbose)
+}
+\arguments{
+\item{counts}{counts matrix.}
+
+\item{params}{SplotchParams object to store estimated values in.}
+
+\item{verbose}{logical. Whether to print progress messages}
+}
+\value{
+SplotchParams object with library size parameters
+}
+\description{
+Estimate the library size parameters for the Splotch simulation
+}
+\details{
+Parameters for the log-normal distribution are estimated by fitting the
+library sizes using \code{\link[fitdistrplus]{fitdist}}. All the fitting
+methods are tried and the fit with the best Cramer-von Mises statistic is
+selected. The density of the library sizes is also estimated using
+\code{\link[stats]{density}}.
+}
diff --git a/man/splotchEstMean.Rd b/man/splotchEstMean.Rd
new file mode 100644
index 0000000..46235b1
--- /dev/null
+++ b/man/splotchEstMean.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splotch-estimate.R
+\name{splotchEstMean}
+\alias{splotchEstMean}
+\title{Estimate Splotch means}
+\usage{
+splotchEstMean(norm.counts, params, verbose)
+}
+\arguments{
+\item{norm.counts}{library size normalised counts matrix.}
+
+\item{params}{SplotchParams object to store estimated values in.}
+
+\item{verbose}{logical. Whether to print progress messages}
+}
+\value{
+SplotchParams object with estimated means
+}
+\description{
+Estimate mean parameters for the Splotch simulation
+}
+\details{
+Parameters for the gamma distribution are estimated by fitting the mean
+normalised counts using \code{\link[fitdistrplus]{fitdist}}. All the fitting
+methods are tried and the fit with the best Cramer-von Mises statistic is
+selected. The density of the means is also estimated using
+\code{\link[stats]{density}}.
+
+Expression outlier genes are detected using the Median Absolute Deviation
+(MAD) from median method. If the log2 mean expression of a gene is greater
+than two MADs above the median log2 mean expression it is designated as an
+outlier. The proportion of outlier genes is used to estimate the outlier
+probability. Factors for each outlier gene are calculated by dividing mean
+expression by the median mean expression. A log-normal distribution is then
+fitted to these factors in order to estimate the outlier factor location and
+scale parameters using the \code{\link[fitdistrplus]{fitdist}} MLE method.
+}
diff --git a/man/splotchEstimate.Rd b/man/splotchEstimate.Rd
index d6cc25f..98e384a 100644
--- a/man/splotchEstimate.Rd
+++ b/man/splotchEstimate.Rd
@@ -40,5 +40,6 @@ params <- splotchEstimate(sce)
 params
 }
 \seealso{
-\code{\link{splotchEstMean}}, \code{\link{splotchEstLib}}
+\code{\link{splotchEstMean}},  \code{\link{splotchEstBCV}},
+\code{\link{splotchEstLib}}
 }
-- 
GitLab