Merge branch 'lun2-update'

* lun2-update: Run checks Fix mean-zeros scale Make lun2Estimate parallel Allow non-interger Lun 2 lib sizes Fix dropout eta

Merge branch 'lun2-update'
f432d552 · Luke Zappia · 0902171d · 73bf11ba · f432d552 · f432d552
Commit f432d552 authored 7 years ago by Luke Zappia
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -32,7 +32,8 @@ Imports:
    utils,
    matrixStats,
    ggplot2,
-    scales
+    scales,
+    BiocParallel
 Suggests:
    testthat,
    scran,

--- a/NAMESPACE
+++ b/NAMESPACE
@@ -45,6 +45,8 @@ importFrom(Biobase,"pData<-")
 importFrom(Biobase,assayData)
 importFrom(Biobase,fData)
 importFrom(Biobase,pData)
+importFrom(BiocParallel,SerialParam)
+importFrom(BiocParallel,bplapply)
 importFrom(checkmate,checkCharacter)
 importFrom(checkmate,checkClass)
 importFrom(checkmate,checkFactor)
@@ -61,6 +63,7 @@ importFrom(ggplot2,geom_smooth)
 importFrom(ggplot2,geom_violin)
 importFrom(ggplot2,ggplot)
 importFrom(ggplot2,ggtitle)
+importFrom(ggplot2,scale_x_log10)
 importFrom(ggplot2,scale_y_continuous)
 importFrom(ggplot2,scale_y_log10)
 importFrom(ggplot2,theme_minimal)

--- a/R/Lun2Params-methods.R
+++ b/R/Lun2Params-methods.R
@@ -9,8 +9,8 @@ newLun2Params <- function(...) {
    return(params)
 }

-#' @importFrom checkmate checkInt checkIntegerish checkNumber checkNumeric
-#' checkLogical checkCharacter checkFactor
+#' @importFrom checkmate checkInt checkNumber checkNumeric checkLogical
+#' checkCharacter checkFactor
 setValidity("Lun2Params", function(object) {

    v <- getParams(object, slotNames(object))
@@ -36,8 +36,8 @@ setValidity("Lun2Params", function(object) {
                gene.ziProps = checkNumeric(v$gene.ziProps, lower = 0,
                                            len = nGenes),
                cell.plates = checkFactor(v$cell.plates, len = nCells),
-                cell.libSizes = checkIntegerish(v$cell.libSizes, lower = 0,
-                                                len = nCells),
+                cell.libSizes = checkNumeric(v$cell.libSizes, lower = 0,
+                                             len = nCells),
                cell.libMod = checkNumber(v$cell.libMod, lower = 0),
                de.nGene = checkInt(v$de.nGenes, lower = 0),
                de.fc = checkNumber(v$de.fc, lower = 0))

--- a/R/compare.R
+++ b/R/compare.R
@@ -44,7 +44,8 @@
 #' names(comparison)
 #' names(comparison$Plots)
 #' @importFrom ggplot2 ggplot aes_string geom_point geom_smooth geom_boxplot
-#' geom_violin scale_y_continuous scale_y_log10 xlab ylab ggtitle theme_minimal
+#' geom_violin scale_y_continuous scale_y_log10 scale_x_log10 xlab ylab ggtitle
+#' theme_minimal
 #' @importFrom scater cpm<-
 #' @export
 compareSCESets <- function(sces) {
@@ -98,7 +99,7 @@ compareSCESets <- function(sces) {
    mean.var <- ggplot(fData.all,
                       aes_string(x = "mean_log_cpm", y = "var_log_cpm",
                                  colour = "Dataset", fill = "Dataset")) +
-        geom_point(alpha = 0.2) +
+        geom_point(size = 0.1, alpha = 0.1) +
        geom_smooth() +
        xlab(expression(paste("Mean ", log[2], "(CPM + 1)"))) +
        ylab(expression(paste("Variance ", log[2], "(CPM + 1)"))) +
@@ -133,12 +134,13 @@ compareSCESets <- function(sces) {
        theme_minimal()

    mean.zeros <- ggplot(fData.all,
-                         aes_string(x = "mean_log_cpm", y = "pct_dropout",
+                         aes_string(x = "mean_counts", y = "pct_dropout",
                                    colour = "Dataset", fill = "Dataset")) +
-        geom_point(alpha = 0.2) +
+        geom_point(size = 0.1, alpha = 0.1) +
        geom_smooth() +
-        xlab(expression(paste("Mean ", log[2], "(CPM + 1)"))) +
-        ylab(expression(paste("Percentage zeros"))) +
+        scale_x_log10(labels = scales::comma) +
+        xlab("Mean count") +
+        ylab("Percentage zeros") +
        ggtitle("Mean-dropout relationship") +
        theme_minimal()


--- a/R/lun2-estimate.R
+++ b/R/lun2-estimate.R
@@ -9,6 +9,9 @@
 #' @param min.size minimum size of clusters when identifying group of cells in
 #'        the data.
 #' @param verbose logical. Whether to show progress messages.
+#' @param BPPARAM A \code{\link[BiocParallel]{BiocParallelParam}} instance
+#'        giving the parallel back-end to be used. Default is
+#'        \code{\link[BiocParallel]{SerialParam}} which uses a single core.
 #'
 #' @details
 #' See \code{\link{Lun2Params}} for more details on the parameters.
@@ -23,16 +26,19 @@
 #' params <- lun2Estimate(sc_example_counts, plates, min.size = 20)
 #' params
 #' }
+#' @importFrom BiocParallel bplapply SerialParam
 #' @export
 lun2Estimate <- function(counts, plates, params = newLun2Params(),
-                         min.size = 200, verbose = TRUE) {
+                         min.size = 200, verbose = TRUE,
+                         BPPARAM = SerialParam()) {
    UseMethod("lun2Estimate")
 }

 #' @rdname lun2Estimate
 #' @export
 lun2Estimate.SCESet <- function(counts, plates, params = newLun2Params(),
-                                min.size = 200, verbose = TRUE) {
+                                min.size = 200, verbose = TRUE,
+                                BPPARAM = SerialParam()) {
    counts <- scater::counts(counts)
    lun2Estimate(counts, plates, params, min.size = min.size, verbose = verbose)
 }
@@ -42,7 +48,8 @@ lun2Estimate.SCESet <- function(counts, plates, params = newLun2Params(),
 #' @importFrom locfit locfit
 #' @export
 lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
-                                min.size = 200, verbose = TRUE) {
+                                min.size = 200, verbose = TRUE,
+                                BPPARAM = SerialParam()) {

    # Check suggested packages
    if (!requireNamespace("scran", quietly = TRUE)) {
@@ -123,7 +130,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
    # As well as errors glmer produces warnings. Stop these showing because we
    # expect them.
    suppressWarnings(
-    collected <- lapply(seq_len(nrow(dge)), function(i) {
+    collected <- bplapply(seq_len(nrow(dge)), function(i) {
        if (progress) {pb$tick()}
        tryCatch({
            out <- lme4::glmer(
@@ -138,7 +145,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
            output <- NA_real_
            return(output)
        })
-    }))
+    }, BPPARAM = BPPARAM))
    sigma2 <- mean(unlist(collected), na.rm = TRUE)

    # Repeating the estimation of the dispersion with ZINB models.
@@ -156,16 +163,26 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
        message("This may take some time. Install 'progress' to see a ",
                "progress bar.")
    }
-    for (i in nonzeros) {
+    zinb.ests <- bplapply(nonzeros, function(i) {
        if (progress) {pb$tick()}
+        zinb.est <- c(mean = zinb.mean[i], prop = zinb.prop[i],
+                      disp = zinb.disp[i])
        tryCatch({
            zfit <- pscl::zeroinfl(dge$count[i, ] ~ 0 + plates | 1,
                                   dist = "negbin", offset = log(sum.facs))
-            zinb.mean[i] <- mean(exp(zfit$coefficients$count))
-            zinb.prop[i] <- zfit$coefficients$zero
-            zinb.disp[i] <- 1 / zfit$theta
+            zinb.est <- c(mean = mean(exp(zfit$coefficients$count)),
+                          prop = unname(zfit$coefficients$zero),
+                          disp = 1 / zfit$theta)
        }, error = function(err) {})
-    }
+        return(zinb.est)
+    }, BPPARAM = BPPARAM)
+
+    zinb.ests <- do.call("rbind", zinb.ests)
+
+    zinb.prop[nonzeros] <- zinb.ests[, "prop"]
+    zinb.disp[nonzeros] <- zinb.ests[, "disp"]
+    zinb.mean[nonzeros] <- zinb.ests[, "mean"]
+
    zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop))

    params <- setParams(params, nGenes = length(logmeans),
@@ -177,4 +194,4 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
                        cell.libSizes = dge$samples$lib.size)

    return(params)
-}
\ No newline at end of file
+}
--- a/R/splat-simulate.R
+++ b/R/splat-simulate.R
@@ -616,10 +616,8 @@ splatSimDropout <- function(sim, params) {
        cell.means <- get_exprs(sim, "CellMeans")

        # Generate probabilites based on expression
-        lib.sizes <- colSums(true.counts)
-        cell.facs <- log(lib.sizes) / median(lib.sizes)
        drop.prob <- sapply(seq_len(nCells), function(idx) {
-            eta <- cell.facs[idx] * (log(cell.means[, idx]))
+            eta <- log(cell.means[, idx])
            return(logistic(eta, x0 = dropout.mid, k = dropout.shape))
        })


--- a/man/lun2Estimate.Rd
+++ b/man/lun2Estimate.Rd
@@ -7,13 +7,13 @@
 \title{Estimate Lun2 simulation parameters}
 \usage{
 lun2Estimate(counts, plates, params = newLun2Params(), min.size = 200,
-  verbose = TRUE)
+  verbose = TRUE, BPPARAM = SerialParam())

 \method{lun2Estimate}{SCESet}(counts, plates, params = newLun2Params(),
-  min.size = 200, verbose = TRUE)
+  min.size = 200, verbose = TRUE, BPPARAM = SerialParam())

 \method{lun2Estimate}{matrix}(counts, plates, params = newLun2Params(),
-  min.size = 200, verbose = TRUE)
+  min.size = 200, verbose = TRUE, BPPARAM = SerialParam())
 }
 \arguments{
 \item{counts}{either a counts matrix or an SCESet object containing count
@@ -27,6 +27,10 @@ data to estimate parameters from.}
 the data.}

 \item{verbose}{logical. Whether to show progress messages.}
+
+\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam}} instance
+giving the parallel back-end to be used. Default is
+\code{\link[BiocParallel]{SerialParam}} which uses a single core.}
 }
 \value{
 LunParams object containing the estimated parameters.