From 66435625328e34d12750c855be8244cb96b298f2 Mon Sep 17 00:00:00 2001 From: Luke Zappia <lazappi@users.noreply.github.com> Date: Thu, 10 Oct 2019 11:13:18 +1100 Subject: [PATCH] Document Splotch estimation functions --- DESCRIPTION | 4 +- NAMESPACE | 8 ---- NEWS.md | 4 ++ R/SplotchParams-methods.R | 4 +- R/splat-estimate.R | 4 +- R/splotch-estimate.R | 79 ++++++++++++++++++++++++++++++++++++++- R/splotch-simulate.R | 8 ---- man/selectFit.Rd | 28 ++++++++++++++ man/splatEstLib.Rd | 2 +- man/splatEstMean.Rd | 2 +- man/splotchEstBCV.Rd | 29 ++++++++++++++ man/splotchEstLib.Rd | 28 ++++++++++++++ man/splotchEstMean.Rd | 37 ++++++++++++++++++ man/splotchEstimate.Rd | 3 +- 14 files changed, 214 insertions(+), 26 deletions(-) create mode 100644 man/selectFit.Rd create mode 100644 man/splotchEstBCV.Rd create mode 100644 man/splotchEstLib.Rd create mode 100644 man/splotchEstMean.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 463be27..707e1cf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 1.9.7.9017 -Date: 2019-10-09 +Version: 1.9.7.9018 +Date: 2019-10-10 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index 98b1372..b0d1768 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -67,16 +67,8 @@ export(splatSimulateGroups) export(splatSimulatePaths) export(splatSimulateSingle) export(splotchEstimate) -export(splotchGenNetwork) export(splotchSample) -export(splotchSelectRegs) export(splotchSetup) -export(splotchSimAmbientCounts) -export(splotchSimCellCounts) -export(splotchSimCellMeans) -export(splotchSimCounts) -export(splotchSimGeneMeans) -export(splotchSimLibSizes) export(splotchSimulate) export(summariseDiff) export(zinbEstimate) diff --git a/NEWS.md b/NEWS.md index 938037b..66387ad 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +### Version 1.9.7.9018 (2019-10-10) + +* Document Splotch estimation functions + ### Version 1.9.7.9017 (2019-10-09) * Document Splotch simulation functions diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R index de38b02..46e572f 100644 --- a/R/SplotchParams-methods.R +++ b/R/SplotchParams-methods.R @@ -13,8 +13,8 @@ newSplotchParams <- function(...) { msg <- paste("The Splotch simulation is still experimental and may produce", "unreliable results. Please try it and report any issues to", - "https://github.com/Oshlack/splatter/issues. The development - version may have improved features.") + "https://github.com/Oshlack/splatter/issues. The development", + "version may have improved features.") rlang:::warn_deprecated(msg, id = "warn.splotch") params <- new("SplotchParams") diff --git a/R/splat-estimate.R b/R/splat-estimate.R index 0d72013..459606d 100644 --- a/R/splat-estimate.R +++ b/R/splat-estimate.R @@ -70,7 +70,7 @@ splatEstimate.matrix <- function(counts, params = newSplatParams()) { #' @param params SplatParams object to store estimated values in. #' #' @details -#' Parameter for the gamma distribution are estimated by fitting the mean +#' Parameters for the gamma distribution are estimated by fitting the mean #' normalised counts using \code{\link[fitdistrplus]{fitdist}}. The 'maximum #' goodness-of-fit estimation' method is used to minimise the Cramer-von Mises #' distance. This can fail in some situations, in which case the 'method of @@ -111,7 +111,7 @@ splatEstMean <- function(norm.counts, params) { #' @param counts counts matrix to estimate parameters from. #' @param params splatParams object to store estimated values in. #' -#' @return splatParams object with estimated values. +#' @return SplatParams object with estimated values. #' #' @importFrom stats shapiro.test splatEstLib <- function(counts, params) { diff --git a/R/splotch-estimate.R b/R/splotch-estimate.R index d876d4d..9756bb3 100644 --- a/R/splotch-estimate.R +++ b/R/splotch-estimate.R @@ -10,7 +10,8 @@ #' @param verbose logical. Whether to print progress messages. #' #' @seealso -#' \code{\link{splotchEstMean}}, \code{\link{splotchEstLib}} +#' \code{\link{splotchEstMean}}, \code{\link{splotchEstBCV}}, +#' \code{\link{splotchEstLib}} #' #' @return SplotchParams object containing the estimated parameters. #' @@ -61,6 +62,33 @@ splotchEstimate.matrix <- function(counts, params = newSplotchParams(), return(params) } +#' Estimate Splotch means +#' +#' Estimate mean parameters for the Splotch simulation +#' +#' @param norm.counts library size normalised counts matrix. +#' @param params SplotchParams object to store estimated values in. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' Parameters for the gamma distribution are estimated by fitting the mean +#' normalised counts using \code{\link[fitdistrplus]{fitdist}}. All the fitting +#' methods are tried and the fit with the best Cramer-von Mises statistic is +#' selected. The density of the means is also estimated using +#' \code{\link[stats]{density}}. +#' +#' Expression outlier genes are detected using the Median Absolute Deviation +#' (MAD) from median method. If the log2 mean expression of a gene is greater +#' than two MADs above the median log2 mean expression it is designated as an +#' outlier. The proportion of outlier genes is used to estimate the outlier +#' probability. Factors for each outlier gene are calculated by dividing mean +#' expression by the median mean expression. A log-normal distribution is then +#' fitted to these factors in order to estimate the outlier factor location and +#' scale parameters using the \code{\link[fitdistrplus]{fitdist}} MLE method. +#' +#' @return SplotchParams object with estimated means +#' +#' @importFrom stats density splotchEstMean <- function(norm.counts, params, verbose) { if (verbose) {message("Estimating mean parameters...")} @@ -102,6 +130,23 @@ splotchEstMean <- function(norm.counts, params, verbose) { return(params) } +#' Estimate Splotch BCV parameters +#' +#' Estimate Biological Coefficient of Variation (BCV) parameters for the Splotch +#' simulation +#' +#' @param counts counts matrix. +#' @param params SplotchParams object to store estimated values in. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' The \code{\link[edgeR]{estimateDisp}} function is used to estimate the common +#' dispersion across the dataset. An exponential correction is applied based on +#' fitting an exponential relationship between simulated and estimated values. +#' If this results in a negative dispersion a simpler linear correction is +#' applied instead. +#' +#' @return SplotchParams object with estimated BCV parameters splotchEstBCV <- function(counts, params, verbose) { if (verbose) {message("Estimating BCV parameters...")} @@ -162,6 +207,23 @@ splotchEstBCV <- function(counts, params, verbose) { return(params) } +#' Estimate Splotch library size parameters +#' +#' Estimate the library size parameters for the Splotch simulation +#' +#' @param counts counts matrix. +#' @param params SplotchParams object to store estimated values in. +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' Parameters for the log-normal distribution are estimated by fitting the +#' library sizes using \code{\link[fitdistrplus]{fitdist}}. All the fitting +#' methods are tried and the fit with the best Cramer-von Mises statistic is +#' selected. The density of the library sizes is also estimated using +#' \code{\link[stats]{density}}. +#' +#' @return SplotchParams object with library size parameters +#' #' @importFrom stats density splotchEstLib <- function(counts, params, verbose) { @@ -182,6 +244,21 @@ splotchEstLib <- function(counts, params, verbose) { return(params) } +#' Select fit +#' +#' Try a variety of fitting methods and select the best one +#' +#' @param data The data to fit +#' @param distr Name of the distribution to fit +#' @param weights Optional vector of weigths +#' @param verbose logical. Whether to print progress messages +#' +#' @details +#' The distribution is fitted to the data using each of the +#' \code{\link[fitdistrplus]{fitdist}} fitting methods. The fit with the +#' smallest Cramer-von Mises statistic is selected. +#' +#' @return The selected fit object selectFit <- function(data, distr, weights = NULL, verbose = TRUE) { checkmate::assertNumeric(data, finite = TRUE, any.missing = FALSE) diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R index 1b73183..9e49571 100644 --- a/R/splotch-simulate.R +++ b/R/splotch-simulate.R @@ -256,7 +256,6 @@ splotchSample <- function(params, verbose = TRUE) { #' function and edge weights are sampled from a standard normal distribution. #' #' @return SplotchParams object with gene network -#' @export splotchGenNetwork <- function(params, verbose) { nGenes <- getParam(params, "nGenes") @@ -293,7 +292,6 @@ splotchGenNetwork <- function(params, verbose) { #' may be improved or replace in the future. #' #' @return SplotchParams object with gene regulators -#' @export splotchSelectRegs <- function(params, verbose) { network.regsSet <- getParam(params, "network.regsSet") @@ -344,7 +342,6 @@ splotchSelectRegs <- function(params, verbose) { #' statistical assumptions. #' #' @return SplotchParams object with gene means -#' @export splotchSimGeneMeans <- function(params, verbose) { mean.values <- getParam(params, "mean.values") @@ -522,7 +519,6 @@ splotchSimPaths <- function(params, verbose) { #' \code{ambient.scale} parameter. #' #' @return SingleCellExperiment with library sizes -#' @export splotchSimLibSizes <- function(sim, params, verbose) { if (verbose) {message("Simulating library sizes...")} @@ -610,7 +606,6 @@ splotchSimLibSizes <- function(sim, params, verbose) { #' the means. #' #' @return SingleCellExperiment with cell means -#' @export splotchSimCellMeans <- function(sim, params, verbose) { cell.names <- colData(sim)$Cell @@ -752,7 +747,6 @@ splotchSimCellMeans <- function(sim, params, verbose) { #' cell means matrix. #' #' @return SingleCellExperiment with cell counts -#' @export splotchSimCellCounts <- function(sim, params, verbose) { if (verbose) {message("Simulating cell counts...")} @@ -788,7 +782,6 @@ splotchSimCellCounts <- function(sim, params, verbose) { #' distribution using these means. #' #' @return SingleCellExperiment with ambient counts -#' @export splotchSimAmbientCounts <- function(sim, params, verbose) { if (verbose) {message("Simulating ambient counts...")} @@ -836,7 +829,6 @@ splotchSimAmbientCounts <- function(sim, params, verbose) { #' @seealso \code{\link[DropletUtils]{downsampleMatrix}} #' #' @return SingleCellExperiment with counts matrix -#' @export splotchSimCounts <- function(sim, params, verbose) { if (verbose) {message("Simulating final counts...")} diff --git a/man/selectFit.Rd b/man/selectFit.Rd new file mode 100644 index 0000000..cb202d2 --- /dev/null +++ b/man/selectFit.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-estimate.R +\name{selectFit} +\alias{selectFit} +\title{Select fit} +\usage{ +selectFit(data, distr, weights = NULL, verbose = TRUE) +} +\arguments{ +\item{data}{The data to fit} + +\item{distr}{Name of the distribution to fit} + +\item{weights}{Optional vector of weigths} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +The selected fit object +} +\description{ +Try a variety of fitting methods and select the best one +} +\details{ +The distribution is fitted to the data using each of the +\code{\link[fitdistrplus]{fitdist}} fitting methods. The fit with the +smallest Cramer-von Mises statistic is selected. +} diff --git a/man/splatEstLib.Rd b/man/splatEstLib.Rd index 30df262..29f69c1 100644 --- a/man/splatEstLib.Rd +++ b/man/splatEstLib.Rd @@ -12,7 +12,7 @@ splatEstLib(counts, params) \item{params}{splatParams object to store estimated values in.} } \value{ -splatParams object with estimated values. +SplatParams object with estimated values. } \description{ The Shapiro-Wilks test is used to determine if the library sizes are diff --git a/man/splatEstMean.Rd b/man/splatEstMean.Rd index c019f67..a9192df 100644 --- a/man/splatEstMean.Rd +++ b/man/splatEstMean.Rd @@ -19,7 +19,7 @@ Estimate rate and shape parameters for the gamma distribution used to simulate gene expression means. } \details{ -Parameter for the gamma distribution are estimated by fitting the mean +Parameters for the gamma distribution are estimated by fitting the mean normalised counts using \code{\link[fitdistrplus]{fitdist}}. The 'maximum goodness-of-fit estimation' method is used to minimise the Cramer-von Mises distance. This can fail in some situations, in which case the 'method of diff --git a/man/splotchEstBCV.Rd b/man/splotchEstBCV.Rd new file mode 100644 index 0000000..ebca37c --- /dev/null +++ b/man/splotchEstBCV.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-estimate.R +\name{splotchEstBCV} +\alias{splotchEstBCV} +\title{Estimate Splotch BCV parameters} +\usage{ +splotchEstBCV(counts, params, verbose) +} +\arguments{ +\item{counts}{counts matrix.} + +\item{params}{SplotchParams object to store estimated values in.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SplotchParams object with estimated BCV parameters +} +\description{ +Estimate Biological Coefficient of Variation (BCV) parameters for the Splotch +simulation +} +\details{ +The \code{\link[edgeR]{estimateDisp}} function is used to estimate the common +dispersion across the dataset. An exponential correction is applied based on +fitting an exponential relationship between simulated and estimated values. +If this results in a negative dispersion a simpler linear correction is +applied instead. +} diff --git a/man/splotchEstLib.Rd b/man/splotchEstLib.Rd new file mode 100644 index 0000000..b606d31 --- /dev/null +++ b/man/splotchEstLib.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-estimate.R +\name{splotchEstLib} +\alias{splotchEstLib} +\title{Estimate Splotch library size parameters} +\usage{ +splotchEstLib(counts, params, verbose) +} +\arguments{ +\item{counts}{counts matrix.} + +\item{params}{SplotchParams object to store estimated values in.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SplotchParams object with library size parameters +} +\description{ +Estimate the library size parameters for the Splotch simulation +} +\details{ +Parameters for the log-normal distribution are estimated by fitting the +library sizes using \code{\link[fitdistrplus]{fitdist}}. All the fitting +methods are tried and the fit with the best Cramer-von Mises statistic is +selected. The density of the library sizes is also estimated using +\code{\link[stats]{density}}. +} diff --git a/man/splotchEstMean.Rd b/man/splotchEstMean.Rd new file mode 100644 index 0000000..46235b1 --- /dev/null +++ b/man/splotchEstMean.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/splotch-estimate.R +\name{splotchEstMean} +\alias{splotchEstMean} +\title{Estimate Splotch means} +\usage{ +splotchEstMean(norm.counts, params, verbose) +} +\arguments{ +\item{norm.counts}{library size normalised counts matrix.} + +\item{params}{SplotchParams object to store estimated values in.} + +\item{verbose}{logical. Whether to print progress messages} +} +\value{ +SplotchParams object with estimated means +} +\description{ +Estimate mean parameters for the Splotch simulation +} +\details{ +Parameters for the gamma distribution are estimated by fitting the mean +normalised counts using \code{\link[fitdistrplus]{fitdist}}. All the fitting +methods are tried and the fit with the best Cramer-von Mises statistic is +selected. The density of the means is also estimated using +\code{\link[stats]{density}}. + +Expression outlier genes are detected using the Median Absolute Deviation +(MAD) from median method. If the log2 mean expression of a gene is greater +than two MADs above the median log2 mean expression it is designated as an +outlier. The proportion of outlier genes is used to estimate the outlier +probability. Factors for each outlier gene are calculated by dividing mean +expression by the median mean expression. A log-normal distribution is then +fitted to these factors in order to estimate the outlier factor location and +scale parameters using the \code{\link[fitdistrplus]{fitdist}} MLE method. +} diff --git a/man/splotchEstimate.Rd b/man/splotchEstimate.Rd index d6cc25f..98e384a 100644 --- a/man/splotchEstimate.Rd +++ b/man/splotchEstimate.Rd @@ -40,5 +40,6 @@ params <- splotchEstimate(sce) params } \seealso{ -\code{\link{splotchEstMean}}, \code{\link{splotchEstLib}} +\code{\link{splotchEstMean}}, \code{\link{splotchEstBCV}}, +\code{\link{splotchEstLib}} } -- GitLab