Skip to content
Snippets Groups Projects
Commit 53f052ae authored by l.zappia's avatar l.zappia
Browse files

Merge branch 'master' into devel

* master:
  Version 0.99.15
  Remove dropout.present estimation
  Add limits to zeros plots
  Adjust default splat DE fac params
  Add BPPARAM to scDDSimulate
  Add summariseDiff function

From: Luke Zappia <lazappi@users.noreply.github.com>

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/splatter@128698 bc3139a8-67e5-0310-9ffc-ced21a209358
parents 2baaba76 34c79e4f
No related branches found
No related tags found
No related merge requests found
Package: splatter Package: splatter
Type: Package Type: Package
Title: Simple Simulation of Single-cell RNA Sequencing Data Title: Simple Simulation of Single-cell RNA Sequencing Data
Version: 0.99.14 Version: 0.99.15
Date: 2017-03-28 Date: 2017-04-14
Author: Luke Zappia Author: Luke Zappia
Authors@R: Authors@R:
c(person("Luke", "Zappia", role = c("aut", "cre"), c(person("Luke", "Zappia", role = c("aut", "cre"),
......
...@@ -39,6 +39,7 @@ export(splatSimulate) ...@@ -39,6 +39,7 @@ export(splatSimulate)
export(splatSimulateGroups) export(splatSimulateGroups)
export(splatSimulatePaths) export(splatSimulatePaths)
export(splatSimulateSingle) export(splatSimulateSingle)
export(summariseDiff)
exportClasses(Lun2Params) exportClasses(Lun2Params)
exportClasses(LunParams) exportClasses(LunParams)
exportClasses(SCDDParams) exportClasses(SCDDParams)
......
# 0.99.15
* Add summariseDiff function
* Add BPPARAM argument to scDDSimulate
* Adjust default Spalt DE factor parameters
* Add limits to zeros diff plots
* Remove estimation of dropout.present
# 0.99.14 # 0.99.14
* Add functions for making comparison panels * Add functions for making comparison panels
......
...@@ -207,8 +207,8 @@ setClass("SplatParams", ...@@ -207,8 +207,8 @@ setClass("SplatParams",
out.facScale = 0.5, out.facScale = 0.5,
de.prob = 0.1, de.prob = 0.1,
de.downProb = 0.5, de.downProb = 0.5,
de.facLoc = 4, de.facLoc = 0.1,
de.facScale = 1, de.facScale = 0.4,
bcv.common = 0.1, bcv.common = 0.1,
bcv.df = 60, bcv.df = 60,
dropout.present = FALSE, dropout.present = FALSE,
......
...@@ -111,7 +111,7 @@ setMethod("show", "SplatParams", function(object) { ...@@ -111,7 +111,7 @@ setMethod("show", "SplatParams", function(object) {
"[Scale]" = "de.facScale"), "[Scale]" = "de.facScale"),
"BCV:" = c("(Common Disp)" = "bcv.common", "BCV:" = c("(Common Disp)" = "bcv.common",
"(DoF)" = "bcv.df"), "(DoF)" = "bcv.df"),
"Dropout:" = c("(Present)" = "dropout.present", "Dropout:" = c("[Present]" = "dropout.present",
"(Midpoint)" = "dropout.mid", "(Midpoint)" = "dropout.mid",
"(Shape)" = "dropout.shape"), "(Shape)" = "dropout.shape"),
"Paths:" = c("[From]" = "path.from", "Paths:" = c("[From]" = "path.from",
......
...@@ -400,6 +400,7 @@ diffSCESets <- function(sces, ref, point.size = 0.1, point.alpha = 0.1, ...@@ -400,6 +400,7 @@ diffSCESets <- function(sces, ref, point.size = 0.1, point.alpha = 0.1,
colour = "Dataset")) + colour = "Dataset")) +
geom_hline(yintercept = 0, colour = "red") + geom_hline(yintercept = 0, colour = "red") +
geom_boxplot() + geom_boxplot() +
scale_y_continuous(limits = c(0, 100)) +
scale_colour_manual(values = colours) + scale_colour_manual(values = colours) +
ylab(paste("Rank difference percentage zeros")) + ylab(paste("Rank difference percentage zeros")) +
ggtitle("Difference in zeros per gene") + ggtitle("Difference in zeros per gene") +
...@@ -410,6 +411,7 @@ diffSCESets <- function(sces, ref, point.size = 0.1, point.alpha = 0.1, ...@@ -410,6 +411,7 @@ diffSCESets <- function(sces, ref, point.size = 0.1, point.alpha = 0.1,
colour = "Dataset")) + colour = "Dataset")) +
geom_hline(yintercept = 0, colour = "red") + geom_hline(yintercept = 0, colour = "red") +
geom_boxplot() + geom_boxplot() +
scale_y_continuous(limits = c(0, 100)) +
scale_colour_manual(values = colours) + scale_colour_manual(values = colours) +
ylab(paste("Rank difference percentage zeros")) + ylab(paste("Rank difference percentage zeros")) +
ggtitle("Difference in zeros per cell") + ggtitle("Difference in zeros per cell") +
...@@ -784,3 +786,66 @@ makeOverallPanel <- function(comp, diff, title = "Overall comparison", ...@@ -784,3 +786,66 @@ makeOverallPanel <- function(comp, diff, title = "Overall comparison",
return(panel) return(panel)
} }
#' Summarise diffSCESets
#'
#' Summarise the results of \code{\link{diffSCESets}}. The various
#' properties are sorted, differences calculated, the Median Absolute Deviation
#' taken as the summary statistic and the ranks calculated.
#'
#' @param diff Output from \code{\link{diffSCESets}}
#'
#' @return List with MADs, ranks and both combined in long format
#' @examples
#' sim1 <- splatSimulate(nGenes = 1000, groupCells = 20)
#' sim2 <- simpleSimulate(nGenes = 1000, nCells = 20)
#' difference <- diffSCESets(list(Splat = sim1, Simple = sim2), ref = "Simple")
#' summary <- summariseDiff(difference)
#' names(summary)
#' head(summary$Long)
#' @export
summariseDiff <- function(diff) {
datasets <- unique(diff$PhenoData$Dataset)
fData.mads <- sapply(datasets, function(dataset) {
df <- diff$FeatureData[diff$FeatureData$Dataset == dataset, ]
mean <- median(abs(df$RankDiffMeanLogCPM))
var <- median(abs(df$RankDiffVarLogCPM))
zeros <- median(abs(df$RankDiffZeros))
mean.var <- median(abs(df$MeanRankVarDiff))
mean.zeros <- median(abs(df$MeanRankZerosDiff))
return(c(Mean = mean, Variance = var, ZerosGene = zeros,
MeanVar = mean.var, MeanZeros = mean.zeros))
})
pData.mads <- sapply(datasets, function(dataset) {
df <- diff$PhenoData[diff$PhenoData$Dataset == dataset, ]
lib.size <- median(abs(df$RankDiffLibSize))
zeros <- median(abs(df$RankDiffZeros))
return(c(LibSize = lib.size, ZerosCell = zeros))
})
mads <- data.frame(Dataset = datasets, t(fData.mads), t(pData.mads))
fData.ranks <- matrixStats::rowRanks(fData.mads)
pData.ranks <- matrixStats::rowRanks(pData.mads)
ranks <- data.frame(Dataset = datasets, t(fData.ranks), t(pData.ranks))
colnames(ranks) <- paste0(colnames(mads), "Rank")
mads.long <- stats::reshape(mads, varying = 2:8, direction = "long",
idvar = "Dataset", timevar = "Statistic",
times = colnames(mads)[2:8], v.names = "MAD")
ranks.long <- stats::reshape(ranks, varying = 2:8, direction = "long",
idvar = "Dataset", timevar = "Statistic",
times = colnames(ranks)[2:8], v.names = "Rank")
long <- data.frame(mads.long, Rank = ranks.long$Rank)
row.names(long) <- NULL
summary <- list(MADs = mads, Ranks = ranks, Long = long)
return(summary)
}
...@@ -7,6 +7,9 @@ ...@@ -7,6 +7,9 @@
#' plots. #' plots.
#' @param plot.file File path to save plots as PDF. #' @param plot.file File path to save plots as PDF.
#' @param verbose logical. Whether to print progress messages #' @param verbose logical. Whether to print progress messages
#' @param BPPARAM A \code{\link[BiocParallel]{BiocParallelParam}} instance
#' giving the parallel back-end to be used. Default is
#' \code{\link[BiocParallel]{SerialParam}} which uses a single core.
#' @param ... any additional parameter settings to override what is provided in #' @param ... any additional parameter settings to override what is provided in
#' \code{params}. #' \code{params}.
#' #'
...@@ -33,8 +36,10 @@ ...@@ -33,8 +36,10 @@
#' } #' }
#' @export #' @export
#' @importFrom scater newSCESet #' @importFrom scater newSCESet
#' @importFrom BiocParallel SerialParam
scDDSimulate <- function(params = newSCDDParams(), plots = FALSE, scDDSimulate <- function(params = newSCDDParams(), plots = FALSE,
plot.file = NULL, verbose = TRUE, ...) { plot.file = NULL, verbose = TRUE,
BPPARAM = SerialParam(), ...) {
checkmate::assertClass(params, "SCDDParams") checkmate::assertClass(params, "SCDDParams")
params <- setParams(params, ...) params <- setParams(params, ...)
...@@ -62,7 +67,8 @@ scDDSimulate <- function(params = newSCDDParams(), plots = FALSE, ...@@ -62,7 +67,8 @@ scDDSimulate <- function(params = newSCDDParams(), plots = FALSE,
plot.file = plot.file, plot.file = plot.file,
random.seed = getParam(params, "seed"), random.seed = getParam(params, "seed"),
varInflation = varInflation, varInflation = varInflation,
condition = getParam(params, "condition")) condition = getParam(params, "condition"),
param = BPPARAM)
} else { } else {
suppressMessages( suppressMessages(
scDD.sim <- scDD::simulateSet(SCdat = getParam(params, "SCdat"), scDD.sim <- scDD::simulateSet(SCdat = getParam(params, "SCdat"),
...@@ -79,7 +85,8 @@ scDDSimulate <- function(params = newSCDDParams(), plots = FALSE, ...@@ -79,7 +85,8 @@ scDDSimulate <- function(params = newSCDDParams(), plots = FALSE,
plot.file = plot.file, plot.file = plot.file,
random.seed = getParam(params, "seed"), random.seed = getParam(params, "seed"),
varInflation = varInflation, varInflation = varInflation,
condition = getParam(params, "condition")) condition = getParam(params, "condition"),
param = BPPARAM)
) )
} }
......
...@@ -195,8 +195,10 @@ splatEstBCV <- function(counts, params) { ...@@ -195,8 +195,10 @@ splatEstBCV <- function(counts, params) {
#' Estimate Splat dropout parameters #' Estimate Splat dropout parameters
#' #'
#' Estimate the midpoint and shape parameters for the logistic function used #' Estimate the midpoint and shape parameters for the logistic function used
#' when simulating dropout. Also estimates whether dropout is likely to be #' when simulating dropout.
#' present in the dataset. #'
# #' Also estimates whether dropout is likely to be
# #' present in the dataset.
#' #'
#' @param norm.counts library size normalised counts matrix. #' @param norm.counts library size normalised counts matrix.
#' @param params SplatParams object to store estimated values in. #' @param params SplatParams object to store estimated values in.
...@@ -204,17 +206,19 @@ splatEstBCV <- function(counts, params) { ...@@ -204,17 +206,19 @@ splatEstBCV <- function(counts, params) {
#' @details #' @details
#' Logistic function parameters are estimated by fitting a logistic function #' Logistic function parameters are estimated by fitting a logistic function
#' to the relationship between log2 mean gene expression and the proportion of #' to the relationship between log2 mean gene expression and the proportion of
#' zeros in each gene. See \code{\link[stats]{nls}} for details of fitting. The #' zeros in each gene. See \code{\link[stats]{nls}} for details of fitting.
#' presence of dropout is determined by comparing the observed number of zeros #'
#' in each gene to the expected number of zeros from a negative binomial # #' The
#' distribution with the gene mean and a dispersion of 0.1. If the maximum # #' presence of dropout is determined by comparing the observed number of zeros
#' difference between the observed number of zeros and the expected number is # #' in each gene to the expected number of zeros from a negative binomial
#' greater than 10 percent of the number of cells # #' distribution with the gene mean and a dispersion of 0.1. If the maximum
#' (\code{max(obs.zeros - exp.zeros) > 0.1 * ncol(norm.counts)}) then dropout is # #' difference between the observed number of zeros and the expected number is
#' considered to be present in the dataset. This is a somewhat crude measure # #' greater than 10 percent of the number of cells
#' but should give a reasonable indication. A more accurate approach is to look # #' (\code{max(obs.zeros - exp.zeros) > 0.1 * ncol(norm.counts)}) then dropout
#' at a plot of log2 mean expression vs the difference between observed and # #' is considered to be present in the dataset. This is a somewhat crude
#' expected number of zeros across all genes. # #' measure but should give a reasonable indication. A more accurate approach
# #' is to look at a plot of log2 mean expression vs the difference between
# #' observed and expected number of zeros across all genes.
#' #'
#' @return SplatParams object with estimated values. #' @return SplatParams object with estimated values.
#' #'
...@@ -234,15 +238,14 @@ splatEstDropout <- function(norm.counts, params) { ...@@ -234,15 +238,14 @@ splatEstDropout <- function(norm.counts, params) {
fit <- nls(y ~ logistic(x, x0 = x0, k = k), data = df, fit <- nls(y ~ logistic(x, x0 = x0, k = k), data = df,
start = list(x0 = 0, k = -1)) start = list(x0 = 0, k = -1))
exp.zeros <- dnbinom(0, mu = means, size = 1 / 0.1) * ncol(norm.counts) #exp.zeros <- dnbinom(0, mu = means, size = 1 / 0.1) * ncol(norm.counts)
present <- max(obs.zeros - exp.zeros) > 0.1 * ncol(norm.counts) #present <- max(obs.zeros - exp.zeros) > 0.1 * ncol(norm.counts)
mid <- summary(fit)$coefficients["x0", "Estimate"] mid <- summary(fit)$coefficients["x0", "Estimate"]
shape <- summary(fit)$coefficients["k", "Estimate"] shape <- summary(fit)$coefficients["k", "Estimate"]
params <- setParams(params, dropout.present = present, dropout.mid = mid, params <- setParams(params, dropout.mid = mid, dropout.shape = shape)
dropout.shape = shape)
return(params) return(params)
} }
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
\title{scDD simulation} \title{scDD simulation}
\usage{ \usage{
scDDSimulate(params = newSCDDParams(), plots = FALSE, plot.file = NULL, scDDSimulate(params = newSCDDParams(), plots = FALSE, plot.file = NULL,
verbose = TRUE, ...) verbose = TRUE, BPPARAM = SerialParam(), ...)
} }
\arguments{ \arguments{
\item{params}{SCDDParams object containing simulation parameters.} \item{params}{SCDDParams object containing simulation parameters.}
...@@ -17,6 +17,10 @@ plots.} ...@@ -17,6 +17,10 @@ plots.}
\item{verbose}{logical. Whether to print progress messages} \item{verbose}{logical. Whether to print progress messages}
\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam}} instance
giving the parallel back-end to be used. Default is
\code{\link[BiocParallel]{SerialParam}} which uses a single core.}
\item{...}{any additional parameter settings to override what is provided in \item{...}{any additional parameter settings to override what is provided in
\code{params}.} \code{params}.}
} }
......
...@@ -16,21 +16,10 @@ SplatParams object with estimated values. ...@@ -16,21 +16,10 @@ SplatParams object with estimated values.
} }
\description{ \description{
Estimate the midpoint and shape parameters for the logistic function used Estimate the midpoint and shape parameters for the logistic function used
when simulating dropout. Also estimates whether dropout is likely to be when simulating dropout.
present in the dataset.
} }
\details{ \details{
Logistic function parameters are estimated by fitting a logistic function Logistic function parameters are estimated by fitting a logistic function
to the relationship between log2 mean gene expression and the proportion of to the relationship between log2 mean gene expression and the proportion of
zeros in each gene. See \code{\link[stats]{nls}} for details of fitting. The zeros in each gene. See \code{\link[stats]{nls}} for details of fitting.
presence of dropout is determined by comparing the observed number of zeros
in each gene to the expected number of zeros from a negative binomial
distribution with the gene mean and a dispersion of 0.1. If the maximum
difference between the observed number of zeros and the expected number is
greater than 10 percent of the number of cells
(\code{max(obs.zeros - exp.zeros) > 0.1 * ncol(norm.counts)}) then dropout is
considered to be present in the dataset. This is a somewhat crude measure
but should give a reasonable indication. A more accurate approach is to look
at a plot of log2 mean expression vs the difference between observed and
expected number of zeros across all genes.
} }
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/compare.R
\name{summariseDiff}
\alias{summariseDiff}
\title{Summarise diffSCESets}
\usage{
summariseDiff(diff)
}
\arguments{
\item{diff}{Output from \code{\link{diffSCESets}}}
}
\value{
List with MADs, ranks and both combined in long format
}
\description{
Summarise the results of \code{\link{diffSCESets}}. The various
properties are sorted, differences calculated, the Median Absolute Deviation
taken as the summary statistic and the ranks calculated.
}
\examples{
sim1 <- splatSimulate(nGenes = 1000, groupCells = 20)
sim2 <- simpleSimulate(nGenes = 1000, nCells = 20)
difference <- diffSCESets(list(Splat = sim1, Simple = sim2), ref = "Simple")
summary <- summariseDiff(difference)
names(summary)
head(summary$Long)
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment