From 1166437b1d5bee3d6a08ea7f2d9f943d4056f024 Mon Sep 17 00:00:00 2001
From: Luke Zappia <luke.zappia@mcri.edu.au>
Date: Mon, 20 Mar 2017 09:17:10 +0000
Subject: [PATCH] Merge branch 'master' into devel

* master:
  Version 0.99.11
  Run checks
  Fix mean-zeros scale
  Make lun2Estimate parallel
  Allow non-interger Lun 2 lib sizes
  Fix dropout eta

From: Luke Zappia <lazappi@users.noreply.github.com>

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/splatter@127511 bc3139a8-67e5-0310-9ffc-ced21a209358
---
 DESCRIPTION            |  7 ++++---
 NAMESPACE              |  3 +++
 NEWS.md                |  7 +++++++
 R/Lun2Params-methods.R |  8 ++++----
 R/compare.R            | 14 ++++++++------
 R/lun2-estimate.R      | 39 ++++++++++++++++++++++++++++-----------
 R/splat-simulate.R     |  4 +---
 man/lun2Estimate.Rd    | 10 +++++++---
 8 files changed, 62 insertions(+), 30 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 3ef758f..e6aee3e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: splatter
 Type: Package
 Title: Simple Simulation of Single-cell RNA Sequencing Data
-Version: 0.99.10
-Date: 2017-03-07
+Version: 0.99.11
+Date: 2017-03-20
 Author: Luke Zappia
 Authors@R:
     c(person("Luke", "Zappia", role = c("aut", "cre"),
@@ -32,7 +32,8 @@ Imports:
     utils,
     matrixStats,
     ggplot2,
-    scales
+    scales,
+    BiocParallel
 Suggests:
     testthat,
     scran,
diff --git a/NAMESPACE b/NAMESPACE
index b6d494a..4e1ed1b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -45,6 +45,8 @@ importFrom(Biobase,"pData<-")
 importFrom(Biobase,assayData)
 importFrom(Biobase,fData)
 importFrom(Biobase,pData)
+importFrom(BiocParallel,SerialParam)
+importFrom(BiocParallel,bplapply)
 importFrom(checkmate,checkCharacter)
 importFrom(checkmate,checkClass)
 importFrom(checkmate,checkFactor)
@@ -61,6 +63,7 @@ importFrom(ggplot2,geom_smooth)
 importFrom(ggplot2,geom_violin)
 importFrom(ggplot2,ggplot)
 importFrom(ggplot2,ggtitle)
+importFrom(ggplot2,scale_x_log10)
 importFrom(ggplot2,scale_y_continuous)
 importFrom(ggplot2,scale_y_log10)
 importFrom(ggplot2,theme_minimal)
diff --git a/NEWS.md b/NEWS.md
index 9480f91..4877c31 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,10 @@
+# 0.99.11
+
+* Add parallel option to lun2Estimate
+* Allow non-integer library sizes in Lun2Params
+* Adjust dropout eta value
+* Adjust comparison plots
+
 # 0.99.10
 
 * Improve Splat estimation procedure
diff --git a/R/Lun2Params-methods.R b/R/Lun2Params-methods.R
index eba7e09..b655884 100644
--- a/R/Lun2Params-methods.R
+++ b/R/Lun2Params-methods.R
@@ -9,8 +9,8 @@ newLun2Params <- function(...) {
     return(params)
 }
 
-#' @importFrom checkmate checkInt checkIntegerish checkNumber checkNumeric
-#' checkLogical checkCharacter checkFactor
+#' @importFrom checkmate checkInt checkNumber checkNumeric checkLogical
+#' checkCharacter checkFactor
 setValidity("Lun2Params", function(object) {
 
     v <- getParams(object, slotNames(object))
@@ -36,8 +36,8 @@ setValidity("Lun2Params", function(object) {
                 gene.ziProps = checkNumeric(v$gene.ziProps, lower = 0,
                                             len = nGenes),
                 cell.plates = checkFactor(v$cell.plates, len = nCells),
-                cell.libSizes = checkIntegerish(v$cell.libSizes, lower = 0,
-                                                len = nCells),
+                cell.libSizes = checkNumeric(v$cell.libSizes, lower = 0,
+                                             len = nCells),
                 cell.libMod = checkNumber(v$cell.libMod, lower = 0),
                 de.nGene = checkInt(v$de.nGenes, lower = 0),
                 de.fc = checkNumber(v$de.fc, lower = 0))
diff --git a/R/compare.R b/R/compare.R
index 989417a..76de2ab 100644
--- a/R/compare.R
+++ b/R/compare.R
@@ -44,7 +44,8 @@
 #' names(comparison)
 #' names(comparison$Plots)
 #' @importFrom ggplot2 ggplot aes_string geom_point geom_smooth geom_boxplot
-#' geom_violin scale_y_continuous scale_y_log10 xlab ylab ggtitle theme_minimal
+#' geom_violin scale_y_continuous scale_y_log10 scale_x_log10 xlab ylab ggtitle
+#' theme_minimal
 #' @importFrom scater cpm<-
 #' @export
 compareSCESets <- function(sces) {
@@ -98,7 +99,7 @@ compareSCESets <- function(sces) {
     mean.var <- ggplot(fData.all,
                        aes_string(x = "mean_log_cpm", y = "var_log_cpm",
                                   colour = "Dataset", fill = "Dataset")) +
-        geom_point(alpha = 0.2) +
+        geom_point(size = 0.1, alpha = 0.1) +
         geom_smooth() +
         xlab(expression(paste("Mean ", log[2], "(CPM + 1)"))) +
         ylab(expression(paste("Variance ", log[2], "(CPM + 1)"))) +
@@ -133,12 +134,13 @@ compareSCESets <- function(sces) {
         theme_minimal()
 
     mean.zeros <- ggplot(fData.all,
-                         aes_string(x = "mean_log_cpm", y = "pct_dropout",
+                         aes_string(x = "mean_counts", y = "pct_dropout",
                                     colour = "Dataset", fill = "Dataset")) +
-        geom_point(alpha = 0.2) +
+        geom_point(size = 0.1, alpha = 0.1) +
         geom_smooth() +
-        xlab(expression(paste("Mean ", log[2], "(CPM + 1)"))) +
-        ylab(expression(paste("Percentage zeros"))) +
+        scale_x_log10(labels = scales::comma) +
+        xlab("Mean count") +
+        ylab("Percentage zeros") +
         ggtitle("Mean-dropout relationship") +
         theme_minimal()
 
diff --git a/R/lun2-estimate.R b/R/lun2-estimate.R
index 3c1bf70..cde774b 100644
--- a/R/lun2-estimate.R
+++ b/R/lun2-estimate.R
@@ -9,6 +9,9 @@
 #' @param min.size minimum size of clusters when identifying group of cells in
 #'        the data.
 #' @param verbose logical. Whether to show progress messages.
+#' @param BPPARAM A \code{\link[BiocParallel]{BiocParallelParam}} instance
+#'        giving the parallel back-end to be used. Default is
+#'        \code{\link[BiocParallel]{SerialParam}} which uses a single core.
 #'
 #' @details
 #' See \code{\link{Lun2Params}} for more details on the parameters.
@@ -23,16 +26,19 @@
 #' params <- lun2Estimate(sc_example_counts, plates, min.size = 20)
 #' params
 #' }
+#' @importFrom BiocParallel bplapply SerialParam
 #' @export
 lun2Estimate <- function(counts, plates, params = newLun2Params(),
-                         min.size = 200, verbose = TRUE) {
+                         min.size = 200, verbose = TRUE,
+                         BPPARAM = SerialParam()) {
     UseMethod("lun2Estimate")
 }
 
 #' @rdname lun2Estimate
 #' @export
 lun2Estimate.SCESet <- function(counts, plates, params = newLun2Params(),
-                                min.size = 200, verbose = TRUE) {
+                                min.size = 200, verbose = TRUE,
+                                BPPARAM = SerialParam()) {
     counts <- scater::counts(counts)
     lun2Estimate(counts, plates, params, min.size = min.size, verbose = verbose)
 }
@@ -42,7 +48,8 @@ lun2Estimate.SCESet <- function(counts, plates, params = newLun2Params(),
 #' @importFrom locfit locfit
 #' @export
 lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
-                                min.size = 200, verbose = TRUE) {
+                                min.size = 200, verbose = TRUE,
+                                BPPARAM = SerialParam()) {
 
     # Check suggested packages
     if (!requireNamespace("scran", quietly = TRUE)) {
@@ -123,7 +130,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
     # As well as errors glmer produces warnings. Stop these showing because we
     # expect them.
     suppressWarnings(
-    collected <- lapply(seq_len(nrow(dge)), function(i) {
+    collected <- bplapply(seq_len(nrow(dge)), function(i) {
         if (progress) {pb$tick()}
         tryCatch({
             out <- lme4::glmer(
@@ -138,7 +145,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
             output <- NA_real_
             return(output)
         })
-    }))
+    }, BPPARAM = BPPARAM))
     sigma2 <- mean(unlist(collected), na.rm = TRUE)
 
     # Repeating the estimation of the dispersion with ZINB models.
@@ -156,16 +163,26 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
         message("This may take some time. Install 'progress' to see a ",
                 "progress bar.")
     }
-    for (i in nonzeros) {
+    zinb.ests <- bplapply(nonzeros, function(i) {
         if (progress) {pb$tick()}
+        zinb.est <- c(mean = zinb.mean[i], prop = zinb.prop[i],
+                      disp = zinb.disp[i])
         tryCatch({
             zfit <- pscl::zeroinfl(dge$count[i, ] ~ 0 + plates | 1,
                                    dist = "negbin", offset = log(sum.facs))
-            zinb.mean[i] <- mean(exp(zfit$coefficients$count))
-            zinb.prop[i] <- zfit$coefficients$zero
-            zinb.disp[i] <- 1 / zfit$theta
+            zinb.est <- c(mean = mean(exp(zfit$coefficients$count)),
+                          prop = unname(zfit$coefficients$zero),
+                          disp = 1 / zfit$theta)
         }, error = function(err) {})
-    }
+        return(zinb.est)
+    }, BPPARAM = BPPARAM)
+
+    zinb.ests <- do.call("rbind", zinb.ests)
+
+    zinb.prop[nonzeros] <- zinb.ests[, "prop"]
+    zinb.disp[nonzeros] <- zinb.ests[, "disp"]
+    zinb.mean[nonzeros] <- zinb.ests[, "mean"]
+
     zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop))
 
     params <- setParams(params, nGenes = length(logmeans),
@@ -177,4 +194,4 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(),
                         cell.libSizes = dge$samples$lib.size)
 
     return(params)
-}
\ No newline at end of file
+}
diff --git a/R/splat-simulate.R b/R/splat-simulate.R
index bb54cb6..70a987d 100644
--- a/R/splat-simulate.R
+++ b/R/splat-simulate.R
@@ -616,10 +616,8 @@ splatSimDropout <- function(sim, params) {
         cell.means <- get_exprs(sim, "CellMeans")
 
         # Generate probabilites based on expression
-        lib.sizes <- colSums(true.counts)
-        cell.facs <- log(lib.sizes) / median(lib.sizes)
         drop.prob <- sapply(seq_len(nCells), function(idx) {
-            eta <- cell.facs[idx] * (log(cell.means[, idx]))
+            eta <- log(cell.means[, idx])
             return(logistic(eta, x0 = dropout.mid, k = dropout.shape))
         })
 
diff --git a/man/lun2Estimate.Rd b/man/lun2Estimate.Rd
index f63273d..3696347 100644
--- a/man/lun2Estimate.Rd
+++ b/man/lun2Estimate.Rd
@@ -7,13 +7,13 @@
 \title{Estimate Lun2 simulation parameters}
 \usage{
 lun2Estimate(counts, plates, params = newLun2Params(), min.size = 200,
-  verbose = TRUE)
+  verbose = TRUE, BPPARAM = SerialParam())
 
 \method{lun2Estimate}{SCESet}(counts, plates, params = newLun2Params(),
-  min.size = 200, verbose = TRUE)
+  min.size = 200, verbose = TRUE, BPPARAM = SerialParam())
 
 \method{lun2Estimate}{matrix}(counts, plates, params = newLun2Params(),
-  min.size = 200, verbose = TRUE)
+  min.size = 200, verbose = TRUE, BPPARAM = SerialParam())
 }
 \arguments{
 \item{counts}{either a counts matrix or an SCESet object containing count
@@ -27,6 +27,10 @@ data to estimate parameters from.}
 the data.}
 
 \item{verbose}{logical. Whether to show progress messages.}
+
+\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam}} instance
+giving the parallel back-end to be used. Default is
+\code{\link[BiocParallel]{SerialParam}} which uses a single core.}
 }
 \value{
 LunParams object containing the estimated parameters.
-- 
GitLab