diff --git a/DESCRIPTION b/DESCRIPTION index 3ef758f6696565391eb13544142c7a0b347c100f..772c35628c0d7d6ab7b3baa3f7dced53fa0c4b1b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,8 @@ Imports: utils, matrixStats, ggplot2, - scales + scales, + BiocParallel Suggests: testthat, scran, diff --git a/NAMESPACE b/NAMESPACE index b6d494ae6c6250912c1ccab26227328dc44d9374..4e1ed1b1e5156d9606895f2d4f1f2e5951f0cbc1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,8 @@ importFrom(Biobase,"pData<-") importFrom(Biobase,assayData) importFrom(Biobase,fData) importFrom(Biobase,pData) +importFrom(BiocParallel,SerialParam) +importFrom(BiocParallel,bplapply) importFrom(checkmate,checkCharacter) importFrom(checkmate,checkClass) importFrom(checkmate,checkFactor) @@ -61,6 +63,7 @@ importFrom(ggplot2,geom_smooth) importFrom(ggplot2,geom_violin) importFrom(ggplot2,ggplot) importFrom(ggplot2,ggtitle) +importFrom(ggplot2,scale_x_log10) importFrom(ggplot2,scale_y_continuous) importFrom(ggplot2,scale_y_log10) importFrom(ggplot2,theme_minimal) diff --git a/R/Lun2Params-methods.R b/R/Lun2Params-methods.R index eba7e0955eca212be8df4958a2ec2d2acd71a45d..b655884d125b6a7ef1939ceaea5a6274d68a02e1 100644 --- a/R/Lun2Params-methods.R +++ b/R/Lun2Params-methods.R @@ -9,8 +9,8 @@ newLun2Params <- function(...) { return(params) } -#' @importFrom checkmate checkInt checkIntegerish checkNumber checkNumeric -#' checkLogical checkCharacter checkFactor +#' @importFrom checkmate checkInt checkNumber checkNumeric checkLogical +#' checkCharacter checkFactor setValidity("Lun2Params", function(object) { v <- getParams(object, slotNames(object)) @@ -36,8 +36,8 @@ setValidity("Lun2Params", function(object) { gene.ziProps = checkNumeric(v$gene.ziProps, lower = 0, len = nGenes), cell.plates = checkFactor(v$cell.plates, len = nCells), - cell.libSizes = checkIntegerish(v$cell.libSizes, lower = 0, - len = nCells), + cell.libSizes = checkNumeric(v$cell.libSizes, lower = 0, + len = nCells), cell.libMod = checkNumber(v$cell.libMod, lower = 0), de.nGene = checkInt(v$de.nGenes, lower = 0), de.fc = checkNumber(v$de.fc, lower = 0)) diff --git a/R/compare.R b/R/compare.R index 989417a0b263d5d3f145ab995e32f25b0f7c6cac..76de2ab8eaf18d0a0ebadd2aa3435c64fc40594e 100644 --- a/R/compare.R +++ b/R/compare.R @@ -44,7 +44,8 @@ #' names(comparison) #' names(comparison$Plots) #' @importFrom ggplot2 ggplot aes_string geom_point geom_smooth geom_boxplot -#' geom_violin scale_y_continuous scale_y_log10 xlab ylab ggtitle theme_minimal +#' geom_violin scale_y_continuous scale_y_log10 scale_x_log10 xlab ylab ggtitle +#' theme_minimal #' @importFrom scater cpm<- #' @export compareSCESets <- function(sces) { @@ -98,7 +99,7 @@ compareSCESets <- function(sces) { mean.var <- ggplot(fData.all, aes_string(x = "mean_log_cpm", y = "var_log_cpm", colour = "Dataset", fill = "Dataset")) + - geom_point(alpha = 0.2) + + geom_point(size = 0.1, alpha = 0.1) + geom_smooth() + xlab(expression(paste("Mean ", log[2], "(CPM + 1)"))) + ylab(expression(paste("Variance ", log[2], "(CPM + 1)"))) + @@ -133,12 +134,13 @@ compareSCESets <- function(sces) { theme_minimal() mean.zeros <- ggplot(fData.all, - aes_string(x = "mean_log_cpm", y = "pct_dropout", + aes_string(x = "mean_counts", y = "pct_dropout", colour = "Dataset", fill = "Dataset")) + - geom_point(alpha = 0.2) + + geom_point(size = 0.1, alpha = 0.1) + geom_smooth() + - xlab(expression(paste("Mean ", log[2], "(CPM + 1)"))) + - ylab(expression(paste("Percentage zeros"))) + + scale_x_log10(labels = scales::comma) + + xlab("Mean count") + + ylab("Percentage zeros") + ggtitle("Mean-dropout relationship") + theme_minimal() diff --git a/R/lun2-estimate.R b/R/lun2-estimate.R index 3c1bf701e03807831f69c5260103e8b59c59eb18..cde774baf9b402bb2baa54e07a38caaa35718e82 100644 --- a/R/lun2-estimate.R +++ b/R/lun2-estimate.R @@ -9,6 +9,9 @@ #' @param min.size minimum size of clusters when identifying group of cells in #' the data. #' @param verbose logical. Whether to show progress messages. +#' @param BPPARAM A \code{\link[BiocParallel]{BiocParallelParam}} instance +#' giving the parallel back-end to be used. Default is +#' \code{\link[BiocParallel]{SerialParam}} which uses a single core. #' #' @details #' See \code{\link{Lun2Params}} for more details on the parameters. @@ -23,16 +26,19 @@ #' params <- lun2Estimate(sc_example_counts, plates, min.size = 20) #' params #' } +#' @importFrom BiocParallel bplapply SerialParam #' @export lun2Estimate <- function(counts, plates, params = newLun2Params(), - min.size = 200, verbose = TRUE) { + min.size = 200, verbose = TRUE, + BPPARAM = SerialParam()) { UseMethod("lun2Estimate") } #' @rdname lun2Estimate #' @export lun2Estimate.SCESet <- function(counts, plates, params = newLun2Params(), - min.size = 200, verbose = TRUE) { + min.size = 200, verbose = TRUE, + BPPARAM = SerialParam()) { counts <- scater::counts(counts) lun2Estimate(counts, plates, params, min.size = min.size, verbose = verbose) } @@ -42,7 +48,8 @@ lun2Estimate.SCESet <- function(counts, plates, params = newLun2Params(), #' @importFrom locfit locfit #' @export lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), - min.size = 200, verbose = TRUE) { + min.size = 200, verbose = TRUE, + BPPARAM = SerialParam()) { # Check suggested packages if (!requireNamespace("scran", quietly = TRUE)) { @@ -123,7 +130,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), # As well as errors glmer produces warnings. Stop these showing because we # expect them. suppressWarnings( - collected <- lapply(seq_len(nrow(dge)), function(i) { + collected <- bplapply(seq_len(nrow(dge)), function(i) { if (progress) {pb$tick()} tryCatch({ out <- lme4::glmer( @@ -138,7 +145,7 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), output <- NA_real_ return(output) }) - })) + }, BPPARAM = BPPARAM)) sigma2 <- mean(unlist(collected), na.rm = TRUE) # Repeating the estimation of the dispersion with ZINB models. @@ -156,16 +163,26 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), message("This may take some time. Install 'progress' to see a ", "progress bar.") } - for (i in nonzeros) { + zinb.ests <- bplapply(nonzeros, function(i) { if (progress) {pb$tick()} + zinb.est <- c(mean = zinb.mean[i], prop = zinb.prop[i], + disp = zinb.disp[i]) tryCatch({ zfit <- pscl::zeroinfl(dge$count[i, ] ~ 0 + plates | 1, dist = "negbin", offset = log(sum.facs)) - zinb.mean[i] <- mean(exp(zfit$coefficients$count)) - zinb.prop[i] <- zfit$coefficients$zero - zinb.disp[i] <- 1 / zfit$theta + zinb.est <- c(mean = mean(exp(zfit$coefficients$count)), + prop = unname(zfit$coefficients$zero), + disp = 1 / zfit$theta) }, error = function(err) {}) - } + return(zinb.est) + }, BPPARAM = BPPARAM) + + zinb.ests <- do.call("rbind", zinb.ests) + + zinb.prop[nonzeros] <- zinb.ests[, "prop"] + zinb.disp[nonzeros] <- zinb.ests[, "disp"] + zinb.mean[nonzeros] <- zinb.ests[, "mean"] + zinb.prop <- exp(zinb.prop) / (1 + exp(zinb.prop)) params <- setParams(params, nGenes = length(logmeans), @@ -177,4 +194,4 @@ lun2Estimate.matrix <- function(counts, plates, params = newLun2Params(), cell.libSizes = dge$samples$lib.size) return(params) -} \ No newline at end of file +} diff --git a/R/splat-simulate.R b/R/splat-simulate.R index bb54cb6f870cfa097a5761a04ad382d681c960a4..70a987d72786c51cfc31f321805de3da84bb5e76 100644 --- a/R/splat-simulate.R +++ b/R/splat-simulate.R @@ -616,10 +616,8 @@ splatSimDropout <- function(sim, params) { cell.means <- get_exprs(sim, "CellMeans") # Generate probabilites based on expression - lib.sizes <- colSums(true.counts) - cell.facs <- log(lib.sizes) / median(lib.sizes) drop.prob <- sapply(seq_len(nCells), function(idx) { - eta <- cell.facs[idx] * (log(cell.means[, idx])) + eta <- log(cell.means[, idx]) return(logistic(eta, x0 = dropout.mid, k = dropout.shape)) }) diff --git a/man/lun2Estimate.Rd b/man/lun2Estimate.Rd index f63273db64cf9e71fca304f6276b1530724caed8..369634764aa1b172861920701a4538e85065448e 100644 --- a/man/lun2Estimate.Rd +++ b/man/lun2Estimate.Rd @@ -7,13 +7,13 @@ \title{Estimate Lun2 simulation parameters} \usage{ lun2Estimate(counts, plates, params = newLun2Params(), min.size = 200, - verbose = TRUE) + verbose = TRUE, BPPARAM = SerialParam()) \method{lun2Estimate}{SCESet}(counts, plates, params = newLun2Params(), - min.size = 200, verbose = TRUE) + min.size = 200, verbose = TRUE, BPPARAM = SerialParam()) \method{lun2Estimate}{matrix}(counts, plates, params = newLun2Params(), - min.size = 200, verbose = TRUE) + min.size = 200, verbose = TRUE, BPPARAM = SerialParam()) } \arguments{ \item{counts}{either a counts matrix or an SCESet object containing count @@ -27,6 +27,10 @@ data to estimate parameters from.} the data.} \item{verbose}{logical. Whether to show progress messages.} + +\item{BPPARAM}{A \code{\link[BiocParallel]{BiocParallelParam}} instance +giving the parallel back-end to be used. Default is +\code{\link[BiocParallel]{SerialParam}} which uses a single core.} } \value{ LunParams object containing the estimated parameters.