From 2151840fb0f3ed6ea328456df2a65214f02550e4 Mon Sep 17 00:00:00 2001 From: Luke Zappia <lazappi@users.noreply.github.com> Date: Wed, 17 Jul 2019 17:07:30 +1000 Subject: [PATCH] Add library size params and simulate cell means --- DESCRIPTION | 4 +-- NAMESPACE | 1 + NEWS.md | 6 ++++ R/AllClasses.R | 31 ++++++++++++++++++-- R/SplotchParams-methods.R | 50 ++++++++++++++++++++++++++++++-- R/splotch-simulate.R | 61 +++++++++++++++++++++++++++++++++++++-- man/SplotchParams.Rd | 16 ++++++++++ 7 files changed, 160 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f3581ec..554bc87 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 1.9.2.9002 -Date: 2019-07-16 +Version: 1.9.2.9003 +Date: 2019-07-17 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index afefae4..769ee81 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -134,6 +134,7 @@ importFrom(methods,slotNames) importFrom(methods,validObject) importFrom(stats,aggregate) importFrom(stats,cor) +importFrom(stats,dbeta) importFrom(stats,dnbinom) importFrom(stats,ks.test) importFrom(stats,median) diff --git a/NEWS.md b/NEWS.md index db32f1f..f3095ec 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +### Version 1.9.2.9003 (2019-07-17) + +* Topologically sort paths +* Add library size parameters +* Simulate cell means + ### Version 1.9.2.9002 (2019-07-16) * Add paths parameters diff --git a/R/AllClasses.R b/R/AllClasses.R index 35fc2af..e497256 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -290,6 +290,22 @@ setClass("SplatParams", #' structure.} #' } #' } +#' \item{\emph{Library size parameters}}{ +#' \describe{ +#' \item{\code{lib.loc}}{Location (meanlog) parameter for the +#' library size log-normal distribution, or mean parameter if a +#' normal distribution is used.} +#' \item{\code{lib.scale}}{Scale (sdlog) parameter for the library +#' size log-normal distribution, or sd parameter if a normal +#' distribution is used.} +#' } +#' } +#' \item{\emph{Paths parameters}}{ +#' \describe{ +#' \item{\code{[cells.design]}}{data.frame describing cell +#' structure.} +#' } +#' } #' } #' #' The parameters not shown in brackets can be estimated from real data using @@ -310,7 +326,10 @@ setClass("SplotchParams", network.regsSet = "logical", paths.nPrograms = "numeric", paths.design = "data.frame", - paths.means = "list"), + paths.means = "list", + lib.loc = "numeric", + lib.scale = "numeric", + cells.design = "data.frame"), prototype = prototype(mean.rate = 0.3, mean.shape = 0.6, mean.values = numeric(), @@ -323,7 +342,15 @@ setClass("SplotchParams", From = 0, Steps = 100 ), - paths.means = list())) + paths.means = list(), + lib.loc = 11, + lib.scale = 0.2, + cells.design = data.frame( + Path = 1, + Probability = 1, + Alpha = 0, + Beta = 1 + ))) #' The LunParams class #' diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R index 53ab297..263e384 100644 --- a/R/SplotchParams-methods.R +++ b/R/SplotchParams-methods.R @@ -33,7 +33,15 @@ setValidity("SplotchParams", function(object) { types = "numeric", any.missing = FALSE, min.rows = 1, - ncols = 3)) + ncols = 3), + lib.loc = checkNumber(v$lib.loc), + lib.scale = checkNumber(v$lib.scale, lower = 0), + cells.design = checkmate::checkDataFrame(v$cells.design, + types = "numeric", + any.missing = FALSE, + nrows = nrow( + v$paths.design), + ncols = 4)) if (!checkmate::testNumeric(v$mean.values, len = 0)) { checks <- c(checks, @@ -73,6 +81,27 @@ setValidity("SplotchParams", function(object) { names = "unique")) } + if (!all(colnames(v$cells.design) == c("Path", "Probability", "Alpha", + "Beta"))) { + checks <- c(checks, cells.design = "Incorrect column names") + } else { + if (!all(sort(v$cells.design$Path) == sort(v$paths.design$Path))) { + checks <- c(checks, + cells.design = "Path names don't match paths.design") + } + if (sum(v$cells.design$Probability) != 1) { + checks <- c(checks, cells.design = "Probability must sum to 1") + } + checks <- c(checks, + "cells.design$Alpha" = checkmate::checkNumeric( + v$cells.design$Alpha, + lower = 0, any.missing = FALSE)) + checks <- c(checks, + "cells.design$Beta" = checkmate::checkNumeric( + v$cells.design$Beta, + lower = 0, any.missing = FALSE)) + } + if (all(checks == TRUE)) { valid <- TRUE } else { @@ -97,6 +126,10 @@ setMethod("show", "SplotchParams", function(object) { pp.paths <- list("Paths:" = c("[nPrograms]" = "paths.nPrograms", "[Design]" = "paths.design")) + pp.bot <- list("Library size:" = c("(Location)" = "lib.loc", + "(Scale)" = "lib.scale"), + "Cells:" = c("[Design]" = "cells.design")) + paths.means <- getParam(object, "paths.means") if (length(paths.means) == 0) { pp.paths[[1]] <- c(pp.paths[[1]], "[Means]*" = "paths.means") @@ -130,9 +163,11 @@ setMethod("show", "SplotchParams", function(object) { if (length(paths.means) != 0) { cat(crayon::bgYellow(crayon::bold(crayon::blue("[MEANS]\n")))) cat(crayon::bold(crayon::green(paste( - "List of", length(paths.means), "matrices" + "List of", length(paths.means), "matrices\n\n" )))) } + + showPP(object, pp.bot) }) #' @rdname setParam @@ -185,6 +220,15 @@ setMethod("setParam", "SplotchParams", function(object, name, value) { stop("network.regsSet can not be set manually") } + if (name == "paths.design" && + (nrow(value) != nrow(getParam(object, "paths.design")))) { + warning("cells.design reset to match paths.design") + cells.design <- data.frame(Path = value$Path, + Probability = 1 / nrow(value), + Alpha = 0, Beta = 1) + object <- setParamUnchecked(object, "cells.design", cells.design) + } + object <- callNextMethod() return(object) @@ -197,7 +241,7 @@ setMethod("setParams", "SplotchParams", function(object, update = NULL, ...) { update <- c(update, list(...)) - update <- bringItemsForward(update, c("network.graph")) + update <- bringItemsForward(update, c("network.graph", "paths.design")) object <- callNextMethod(object, update) diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R index e91e4b7..9bf54b9 100644 --- a/R/splotch-simulate.R +++ b/R/splotch-simulate.R @@ -16,6 +16,7 @@ #' #' @export #' @importFrom SingleCellExperiment SingleCellExperiment +#' @importFrom stats dbeta splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { checkmate::assertClass(params, "SplotchParams") @@ -57,6 +58,7 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { } # Generate paths + if (verbose) {message("Simulating paths...")} paths.design <- getParam(params, "paths.design") network.graph <- getParam(params, "network.graph") network.weights <- igraph::as_adjacency_matrix(network.graph, @@ -68,7 +70,15 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { nrow = network.nRegs, ncol = paths.nPrograms) paths.changes <- vector("list", nrow(paths.design)) paths.factors <- vector("list", nrow(paths.design)) - for (path in seq_len(nrow(paths.design))) { + + paths.graph <- igraph::graph_from_data_frame(paths.design) + paths.order <- names(igraph::topo_sort(paths.graph, mode = "in")) + paths.order <- as.numeric(paths.order) + # The origin is not a path + paths.order <- paths.order[paths.order != 0] + + for (path in paths.order) { + if (verbose) {message("Simulating path ", path, "...")} nSteps <- paths.design$Steps[path] from <- paths.design$From[path] changes <- matrix(0, nrow = nGenes, ncol = nSteps + 1) @@ -104,7 +114,54 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { names(paths.means) <- paste0("Path", paths.design$Path) params <- setParam(params, "paths.means", paths.means) - # Simulate base gene means + if (verbose) {message("Simulating library sizes...")} + nCells <- getParam(params, "nCells") + lib.loc <- getParam(params, "lib.loc") + lib.scale <- getParam(params, "lib.scale") + lib.sizes <- rlnorm(nCells, lib.loc, lib.scale) + + if (verbose) {message("Simulating cell means...")} + nCells <- getParam(params, "nCells") + cells.design <- getParam(params, "cells.design") + cells.paths <- sample(cells.design$Path, nCells, replace = TRUE, + prob = cells.design$Probability) + paths.design <- getParam(params, "paths.design") + paths.cells.design <- merge(paths.design, cells.design) + steps.probs <- apply(paths.cells.design, 1, function(path) { + steps <- path["Steps"] + dens <- dbeta(seq(0, 1, length.out = steps), + path["Alpha"], path["Beta"]) + # Adjust for infinite values at edge of distribution + dens.inf <- !is.finite(dens) + if (any(dens.inf) && all(dens[!dens.inf] == 0)) { + dens[dens.inf] <- 1 + } + if (!is.finite(dens[1])) { + dens[1] <- 1.1 * dens[2] + } + if (!is.finite(dens[steps])) { + dens[steps] <- 1.1 * dens[steps - 1] + } + + probs <- dens / sum(dens) + + # Return a list to avoid getting a matrix if all path lengths are equal + return(list(probs)) + }) + # Remove unnecessary list level + steps.probs <- lapply(steps.probs, "[[", 1) + names(steps.probs) <- paths.cells.design$Path + cells.steps <- sapply(cells.paths, function(path) { + probs <- steps.probs[[path]] + step <- sample(1:length(probs), 1, prob = probs) + return(step) + }) + cells.means <- sapply(seq_len(nCells), function(cell) { + path <- cells.paths[cell] + step <- cells.steps[cell] + means <- paths.means[[path]][, step] + return(means) + }) # sim <- SingleCellExperiment(assays = list(counts = counts), # rowData = features, diff --git a/man/SplotchParams.Rd b/man/SplotchParams.Rd index 20c32bd..3c437d2 100644 --- a/man/SplotchParams.Rd +++ b/man/SplotchParams.Rd @@ -40,6 +40,22 @@ The Splotch simulation uses the following parameters: structure.} } } + \item{\emph{Library size parameters}}{ + \describe{ + \item{\code{lib.loc}}{Location (meanlog) parameter for the + library size log-normal distribution, or mean parameter if a + normal distribution is used.} + \item{\code{lib.scale}}{Scale (sdlog) parameter for the library + size log-normal distribution, or sd parameter if a normal + distribution is used.} + } + } + \item{\emph{Paths parameters}}{ + \describe{ + \item{\code{[cells.design]}}{data.frame describing cell + structure.} + } + } } The parameters not shown in brackets can be estimated from real data using -- GitLab