Skip to content
Snippets Groups Projects
Commit 1166437b authored by Luke Zappia's avatar Luke Zappia
Browse files

Merge branch 'master' into devel

* master:
  Version 0.99.11
  Run checks
  Fix mean-zeros scale
  Make lun2Estimate parallel
  Allow non-interger Lun 2 lib sizes
  Fix dropout eta

From: Luke Zappia <lazappi@users.noreply.github.com>

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/splatter@127511 bc3139a8-67e5-0310-9ffc-ced21a209358
parent cc0bbdea
No related branches found
No related tags found
No related merge requests found
Package: splatter Package: splatter
Type: Package Type: Package
Title: Simple Simulation of Single-cell RNA Sequencing Data Title: Simple Simulation of Single-cell RNA Sequencing Data
Version: 0.99.10 Version: 0.99.11
Date: 2017-03-07 Date: 2017-03-20
Author: Luke Zappia Author: Luke Zappia
Authors@R: Authors@R:
c(person("Luke", "Zappia", role = c("aut", "cre"), c(person("Luke", "Zappia", role = c("aut", "cre"),
...@@ -32,7 +32,8 @@ Imports: ...@@ -32,7 +32,8 @@ Imports:
utils, utils,
matrixStats, matrixStats,
ggplot2, ggplot2,
scales scales,
BiocParallel
Suggests: Suggests:
testthat, testthat,
scran, scran,
......
...@@ -45,6 +45,8 @@ importFrom(Biobase,"pData<-") ...@@ -45,6 +45,8 @@ importFrom(Biobase,"pData<-")
importFrom(Biobase,assayData) importFrom(Biobase,assayData)
importFrom(Biobase,fData) importFrom(Biobase,fData)
importFrom(Biobase,pData) importFrom(Biobase,pData)
importFrom(BiocParallel,SerialParam)
importFrom(BiocParallel,bplapply)
importFrom(checkmate,checkCharacter) importFrom(checkmate,checkCharacter)
importFrom(checkmate,checkClass) importFrom(checkmate,checkClass)
importFrom(checkmate,checkFactor) importFrom(checkmate,checkFactor)
...@@ -61,6 +63,7 @@ importFrom(ggplot2,geom_smooth) ...@@ -61,6 +63,7 @@ importFrom(ggplot2,geom_smooth)
importFrom(ggplot2,geom_violin) importFrom(ggplot2,geom_violin)
importFrom(ggplot2,ggplot) importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggtitle) importFrom(ggplot2,ggtitle)
importFrom(ggplot2,scale_x_log10)
importFrom(ggplot2,scale_y_continuous) importFrom(ggplot2,scale_y_continuous)
importFrom(ggplot2,scale_y_log10) importFrom(ggplot2,scale_y_log10)
importFrom(ggplot2,theme_minimal) importFrom(ggplot2,theme_minimal)
......
# 0.99.11
* Add parallel option to lun2Estimate
* Allow non-integer library sizes in Lun2Params
* Adjust dropout eta value
* Adjust comparison plots
# 0.99.10 # 0.99.10
* Improve Splat estimation procedure * Improve Splat estimation procedure
......
...@@ -9,8 +9,8 @@ newLun2Params <- function(...) { ...@@ -9,8 +9,8 @@ newLun2Params <- function(...) {
return(params) return(params)
} }
#' @importFrom checkmate checkInt checkIntegerish checkNumber checkNumeric #' @importFrom checkmate checkInt checkNumber checkNumeric checkLogical
#' checkLogical checkCharacter checkFactor #' checkCharacter checkFactor
setValidity("Lun2Params", function(object) { setValidity("Lun2Params", function(object) {
v <- getParams(object, slotNames(object)) v <- getParams(object, slotNames(object))
...@@ -36,8 +36,8 @@ setValidity("Lun2Params", function(object) { ...@@ -36,8 +36,8 @@ setValidity("Lun2Params", function(object) {
gene.ziProps = checkNumeric(v$gene.ziProps, lower = 0, gene.ziProps = checkNumeric(v$gene.ziProps, lower = 0,
len = nGenes), len = nGenes),
cell.plates = checkFactor(v$cell.plates, len = nCells), cell.plates = checkFactor(v$cell.plates, len = nCells),
cell.libSizes = checkIntegerish(v$cell.libSizes, lower = 0, cell.libSizes = checkNumeric(v$cell.libSizes, lower = 0,
len = nCells), len = nCells),
cell.libMod = checkNumber(v$cell.libMod, lower = 0), cell.libMod = checkNumber(v$cell.libMod, lower = 0),
de.nGene = checkInt(v$de.nGenes, lower = 0), de.nGene = checkInt(v$de.nGenes, lower = 0),
de.fc = checkNumber(v$de.fc, lower = 0)) de.fc = checkNumber(v$de.fc, lower = 0))
......
...@@ -44,7 +44,8 @@ ...@@ -44,7 +44,8 @@
#' names(comparison) #' names(comparison)
#' names(comparison$Plots) #' names(comparison$Plots)
#' @importFrom ggplot2 ggplot aes_string geom_point geom_smooth geom_boxplot #' @importFrom ggplot2 ggplot aes_string geom_point geom_smooth geom_boxplot
#' geom_violin scale_y_continuous scale_y_log10 xlab ylab ggtitle theme_minimal #' geom_violin scale_y_continuous scale_y_log10 scale_x_log10 xlab ylab ggtitle
#' theme_minimal
#' @importFrom scater cpm<- #' @importFrom scater cpm<-
#' @export #' @export
compareSCESets <- function(sces) { compareSCESets <- function(sces) {
...@@ -98,7 +99,7 @@ compareSCESets <- function(sces) { ...@@ -98,7 +99,7 @@ compareSCESets <- function(sces) {
mean.var <- ggplot(fData.all, mean.var <- ggplot(fData.all,
aes_string(x = "mean_log_cpm", y = "var_log_cpm", aes_string(x = "mean_log_cpm", y = "var_log_cpm",
colour = "Dataset", fill = "Dataset")) + colour = "Dataset", fill = "Dataset")) +
geom_point(alpha = 0.2) + geom_point(size = 0.1, alpha = 0.1) +
geom_smooth() + geom_smooth() +
xlab(expression(paste("Mean ", log[2], "(CPM + 1)"))) + xlab(expression(paste("Mean ", log[2], "(CPM + 1)"))) +
ylab(expression(paste("Variance ", log[2], "(CPM + 1)"))) + ylab(expression(paste("Variance ", log[2], "(CPM + 1)"))) +
...@@ -133,12 +134,13 @@ compareSCESets <- function(sces) { ...@@ -133,12 +134,13 @@ compareSCESets <- function(sces) {
theme_minimal() theme_minimal()
mean.zeros <- ggplot(fData.all, mean.zeros <- ggplot(fData.all,
aes_string(x = "mean_log_cpm", y = "pct_dropout", aes_string(x = "mean_counts", y = "pct_dropout",
colour = "Dataset", fill = "Dataset")) + colour = "Dataset", fill = "Dataset")) +
geom_point(alpha = 0.2) + geom_point(size = 0.1, alpha = 0.1) +
geom_smooth() + geom_smooth() +
xlab(expression(paste("Mean ", log[2], "(CPM + 1)"))) + scale_x_log10(labels = scales::comma) +
ylab(expression(paste("Percentage zeros"))) + xlab("Mean count") +
ylab("Percentage zeros") +
ggtitle("Mean-dropout relationship") + ggtitle("Mean-dropout relationship") +
theme_minimal() theme_minimal()
......
...@@ -9,6 +9,9 @@ ...@@ -9,6 +9,9 @@
#' @param min.size minimum size of clusters when identifying group of cells in #' @param min.size minimum size of clusters when identifying group of cells in
#' the data. #' the data.
#' @param verbose logical. Whether to show progress messages. #' @param verbose logical. Whether to show progress messages.
#' @param BPPARAM A \code{\link[BiocParallel]{BiocParallelParam}} instance
#' giving the parallel back-end to be used. Default is
#' \code{\link[BiocParallel]{SerialParam}} which uses a single core.
#' #'
#' @details #' @details
#' See \code{\link{Lun2Params}} for more details on the parameters. #' See \code{\link{Lun2Params}} for more details on the parameters.
...@@ -23,16 +26,19 @@ ...@@ -23,16 +26,19 @@
#' params <- lun2Estimate(sc_example_counts, plates, min.size = 20) #' params <- lun2Estimate(sc_example_counts, plates, min.size = 20)
#' params #' params
#' } #' }
#' @importFrom BiocParallel bplapply SerialParam
#' @export #' @export
lun2Estimate <- function(counts, plates, params = newLun2Params(), lun2Estimate <- function(counts, plates, params = newLun2Params(),
min.size = 200, verbose = TRUE) { min.size = 200, verbose = TRUE,
BPPARAM = SerialParam()) {
UseMethod("lun2Estimate") UseMethod("lun2Estimate")
} }
#' @rdname lun2Estimate #' @rdname lun2Estimate
#' @export #' @export
lun2Estimate.SCESet <- function(counts, plates, params = newLun2Params(), lun2Estimate.SCESet <- function(counts, plates, params = newLun2Params(),
min.size = 200, verbose = TRUE) { min.size = 200, verbose = TRUE,
BPPARAM = SerialParam()) {
counts <- scater::counts(counts) counts <- scater::counts(counts)
lun2Estimate(counts, plates, params, min.size = min.size, verbose = verbose) lun2Estimate(counts, plates, params, min.size = min.size, verbose = verbose)
} }
...@@ -42,7 +48,8 @@ lun2Estimate.SCESet <- function(counts, plates, params = newLun2Params(), ...@@ -42,7 +48,8 @@ lun2Estimate.SCESet <- function(counts, plates, params = newLun2Params(),
#' @importFrom locfit locfit #' @importFrom locfit locfit
#' @export #' @export
lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
min.size = 200, verbose = TRUE) { min.size = 200, verbose = TRUE,
BPPARAM = SerialParam()) {
# Check suggested packages # Check suggested packages
if (!requireNamespace("scran", quietly = TRUE)) { if (!requireNamespace("scran", quietly = TRUE)) {
...@@ -123,7 +130,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), ...@@ -123,7 +130,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
# As well as errors glmer produces warnings. Stop these showing because we # As well as errors glmer produces warnings. Stop these showing because we
# expect them. # expect them.
suppressWarnings( suppressWarnings(
collected <- lapply(seq_len(nrow(dge)), function(i) { collected <- bplapply(seq_len(nrow(dge)), function(i) {
if (progress) {pb$tick()} if (progress) {pb$tick()}
tryCatch({ tryCatch({
out <- lme4::glmer( out <- lme4::glmer(
...@@ -138,7 +145,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), ...@@ -138,7 +145,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
output <- NA_real_ output <- NA_real_
return(output) return(output)
}) })
})) }, BPPARAM = BPPARAM))
sigma2 <- mean(unlist(collected), na.rm = TRUE) sigma2 <- mean(unlist(collected), na.rm = TRUE)
# Repeating the estimation of the dispersion with ZINB models. # Repeating the estimation of the dispersion with ZINB models.
...@@ -156,16 +163,26 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), ...@@ -156,16 +163,26 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
message("This may take some time. Install 'progress' to see a ", message("This may take some time. Install 'progress' to see a ",
"progress bar.") "progress bar.")
} }
for (i in nonzeros) { zinb.ests <- bplapply(nonzeros, function(i) {
if (progress) {pb$tick()} if (progress) {pb$tick()}
zinb.est <- c(mean = zinb.mean[i], prop = zinb.prop[i],
disp = zinb.disp[i])
tryCatch({ tryCatch({
zfit <- pscl::zeroinfl(dge$count[i, ] ~ 0 + plates | 1, zfit <- pscl::zeroinfl(dge$count[i, ] ~ 0 + plates | 1,
dist = "negbin", offset = log(sum.facs)) dist = "negbin", offset = log(sum.facs))
zinb.mean[i] <- mean(exp(zfit$coefficients$count)) zinb.est <- c(mean = mean(exp(zfit$coefficients$count)),
zinb.prop[i] <- zfit$coefficients$zero prop = unname(zfit$coefficients$zero),
zinb.disp[i] <- 1 / zfit$theta disp = 1 / zfit$theta)
}, error = function(err) {}) }, error = function(err) {})
} return(zinb.est)
}, BPPARAM = BPPARAM)
zinb.ests <- do.call("rbind", zinb.ests)
zinb.prop[nonzeros] <- zinb.ests[, "prop"]
zinb.disp[nonzeros] <- zinb.ests[, "disp"]
zinb.mean[nonzeros] <- zinb.ests[, "mean"]
zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop)) zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop))
params <- setParams(params, nGenes = length(logmeans), params <- setParams(params, nGenes = length(logmeans),
...@@ -177,4 +194,4 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), ...@@ -177,4 +194,4 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
cell.libSizes = dge$samples$lib.size) cell.libSizes = dge$samples$lib.size)
return(params) return(params)
} }
\ No newline at end of file
...@@ -616,10 +616,8 @@ splatSimDropout <- function(sim, params) { ...@@ -616,10 +616,8 @@ splatSimDropout <- function(sim, params) {
cell.means <- get_exprs(sim, "CellMeans") cell.means <- get_exprs(sim, "CellMeans")
# Generate probabilites based on expression # Generate probabilites based on expression
lib.sizes <- colSums(true.counts)
cell.facs <- log(lib.sizes) / median(lib.sizes)
drop.prob <- sapply(seq_len(nCells), function(idx) { drop.prob <- sapply(seq_len(nCells), function(idx) {
eta <- cell.facs[idx] * (log(cell.means[, idx])) eta <- log(cell.means[, idx])
return(logistic(eta, x0 = dropout.mid, k = dropout.shape)) return(logistic(eta, x0 = dropout.mid, k = dropout.shape))
}) })
......
...@@ -7,13 +7,13 @@ ...@@ -7,13 +7,13 @@
\title{Estimate Lun2 simulation parameters} \title{Estimate Lun2 simulation parameters}
\usage{ \usage{
lun2Estimate(counts, plates, params = newLun2Params(), min.size = 200, lun2Estimate(counts, plates, params = newLun2Params(), min.size = 200,
verbose = TRUE) verbose = TRUE, BPPARAM = SerialParam())
\method{lun2Estimate}{SCESet}(counts, plates, params = newLun2Params(), \method{lun2Estimate}{SCESet}(counts, plates, params = newLun2Params(),
min.size = 200, verbose = TRUE) min.size = 200, verbose = TRUE, BPPARAM = SerialParam())
\method{lun2Estimate}{matrix}(counts, plates, params = newLun2Params(), \method{lun2Estimate}{matrix}(counts, plates, params = newLun2Params(),
min.size = 200, verbose = TRUE) min.size = 200, verbose = TRUE, BPPARAM = SerialParam())
} }
\arguments{ \arguments{
\item{counts}{either a counts matrix or an SCESet object containing count \item{counts}{either a counts matrix or an SCESet object containing count
...@@ -27,6 +27,10 @@ data to estimate parameters from.} ...@@ -27,6 +27,10 @@ data to estimate parameters from.}
the data.} the data.}
\item{verbose}{logical. Whether to show progress messages.} \item{verbose}{logical. Whether to show progress messages.}
\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam}} instance
giving the parallel back-end to be used. Default is
\code{\link[BiocParallel]{SerialParam}} which uses a single core.}
} }
\value{ \value{
LunParams object containing the estimated parameters. LunParams object containing the estimated parameters.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment