diff --git a/DESCRIPTION b/DESCRIPTION index 0db4d2baa9c6a3cd125ff05e7b9fd6c9ce5c4f59..25f5b91c8125e8cf30942399b4941e4eafac8290 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 1.9.3.9004 +Version: 1.9.3.9005 Date: 2019-08-08 Author: Luke Zappia Authors@R: diff --git a/NEWS.md b/NEWS.md index 22109a01d816de25b9654a86859ca92513a598b2..afaeb753eb4c69f6b32b932742cbf61c161dc72e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +### Version 1.9.3.9005(2019-08-08) + +* Simulate counts +* Split into separate functions for each stage + ### Version 1.9.3.9004 (2019-08-08) * Merge master into splotch branch diff --git a/R/SplotchParams-methods.R b/R/SplotchParams-methods.R index 263e384a0b9718ca6e3678d148d977eaa5e152bd..b673e030f68505cb73c7d507462cdeeba4194090 100644 --- a/R/SplotchParams-methods.R +++ b/R/SplotchParams-methods.R @@ -17,7 +17,7 @@ setValidity("SplotchParams", function(object) { v <- getParams(object, slotNames(object)) - checks <- c(#nGenes = checkmate::checkInt(v$nGenes, lower = 1), + checks <- c(nGenes = checkmate::checkInt(v$nGenes, lower = 1), nCells = checkmate::checkInt(v$nCells, lower = 1), seed = checkmate::checkInt(v$seed, lower = 0), mean.rate = checkmate::checkNumber(v$mean.rate, lower = 0), @@ -163,7 +163,8 @@ setMethod("show", "SplotchParams", function(object) { if (length(paths.means) != 0) { cat(crayon::bgYellow(crayon::bold(crayon::blue("[MEANS]\n")))) cat(crayon::bold(crayon::green(paste( - "List of", length(paths.means), "matrices\n\n" + "List of", length(paths.means), "matrices with names:", + paste(names(paths.means), collapse = ", "), "\n\n" )))) } diff --git a/R/splotch-simulate.R b/R/splotch-simulate.R index 9bf54b9974f17cdf874bbb950a3077fe3b82c3c4..ad3f1a132dc821c3588c5741f26eac6f1b462b15 100644 --- a/R/splotch-simulate.R +++ b/R/splotch-simulate.R @@ -16,9 +16,16 @@ #' #' @export #' @importFrom SingleCellExperiment SingleCellExperiment -#' @importFrom stats dbeta splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { + params <- splotchSetup(params, verbose, ...) + sim <- splotchSample(params, verbose) + + return(sim) +} + +splotchSetup <- function(params = newSplotchParams(), verbose = TRUE, ...) { + checkmate::assertClass(params, "SplotchParams") params <- setParams(params, ...) @@ -26,39 +33,139 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { seed <- getParam(params, "seed") set.seed(seed) - # Get the parameters we are going to use + if (verbose) {message("Setting up parameters...")} + params <- splotchGenNetwork(params, verbose) + params <- splotchSelectRegs(params, verbose) + params <- splotchSimGeneMeans(params, verbose) + params <- splotchSimPaths(params, verbose) + + return(params) +} + +splotchSample <- function(params, verbose = TRUE) { + + # Check that parameters are set up + checkmate::assertClass(params, "SplotchParams") + network.graph <- getParam(params, "network.graph") + if (is.null(network.graph)) { + stop("'network.graph' not set, run splotchSetup first") + } + network.regsSet <- getParam(params, "network.regsSet") + if (!network.regsSet) { + stop("network regulators not set, run splotchSetup first") + } + mean.values <- getParam(params, "mean.values") + if (length(mean.values) == 0) { + stop("'mean.values' not set, run splotchSetup first") + } + paths.means <- getParam(params, "paths.means") + if (length(mean.values) == 0) { + stop("'paths.means' not set, run splotchSetup first") + } + + if (verbose) {message("Creating simulation object...")} + nGenes <- getParam(params, "nGenes") + nCells <- getParam(params, "nCells") + cell.names <- paste0("Cell", seq_len(nCells)) + gene.names <- paste0("Gene", seq_len(nGenes)) + + cells <- data.frame(Cell = cell.names, row.names = cell.names) + features <- data.frame(Gene = gene.names, row.names = gene.names) + sim <- SingleCellExperiment(rowData = features, colData = cells, + metadata = list(Params = params)) + + sim <- splotchSimLibSizes(sim, params, verbose) + sim <- splotchSimCellMeans(sim, params, verbose) + sim <- splotchSimCounts(sim, params, verbose) + + return(sim) + +} + +splotchGenNetwork <- function(params, verbose) { + nGenes <- getParam(params, "nGenes") network.graph <- getParam(params, "network.graph") - # Generate network - if (is.null(network.graph)) { - network.graph <- generateNetwork(nGenes, verbose) - params <- setParam(params, "network.graph", network.graph) + if (!is.null(network.graph)) { + if (verbose) {message("Using provided gene network...")} + return(params) } - # Select regulators - if (!getParam(params, "network.regsSet")) { - network.nRegs <- getParam(params, "network.nRegs") - network.graph <- selectRegulators(network.graph, network.nRegs, - verbose) - params <- setParam(params, "network.graph", network.graph) - } else { + if (verbose) {message("Generating gene network...")} + + graph.raw <- igraph::sample_forestfire(nGenes, 0.1) + graph.data <- igraph::get.data.frame(graph.raw) + graph.data <- graph.data[, c("from", "to")] + graph.data$weight <- rnorm(nrow(graph.data)) + graph <- igraph::graph.data.frame(graph.data) + + params <- setParam(params, "network.graph", graph) + + return(params) +} + +splotchSelectRegs <- function(params, verbose) { + + network.regsSet <- getParam(params, "network.regsSet") + + if (network.regsSet) { if (verbose) {message("Using selected regulators...")} + return(params) } + if (verbose) {message("Selecting regulators...")} + network.nRegs <- getParam(params, "network.nRegs") + network.graph <- getParam(params, "network.graph") + + out.degree <- igraph::degree(network.graph, mode = "out") + in.degree <- igraph::degree(network.graph, mode = "in") + reg.prob <- out.degree - in.degree + reg.prob <- reg.prob + rnorm(length(reg.prob)) + reg.prob[reg.prob <= 0] <- 1e-10 + reg.prob <- reg.prob / sum(reg.prob) + reg.nodes <- names(rev(sort(reg.prob))[1:network.nRegs]) + is.reg <- igraph::V(network.graph)$name %in% reg.nodes + network.graph <- igraph::set_vertex_attr(network.graph, "IsReg", + value = is.reg) + + params <- setParam(params, "network.graph", network.graph) + + return(params) +} + +splotchSimGeneMeans <- function(params, verbose) { + + mean.values <- getParam(params, "mean.values") + # Generate means - if (length(getParam(params, "mean.values")) == 0) { - if (verbose) {message("Simulating means...")} - mean.shape <- getParam(params, "mean.shape") - mean.rate <- getParam(params, "mean.rate") - mean.values <- rgamma(nGenes, shape = mean.shape, rate = mean.rate) - params <- setParam(params, "mean.values", mean.values) - } else { + if (length(mean.values) != 0) { if (verbose) {message("Using defined means...")} + return(params) + } + + if (verbose) {message("Simulating means...")} + nGenes <- getParam(params, "nGenes") + mean.shape <- getParam(params, "mean.shape") + mean.rate <- getParam(params, "mean.rate") + + mean.values <- rgamma(nGenes, shape = mean.shape, rate = mean.rate) + params <- setParam(params, "mean.values", mean.values) + + return(params) +} + +splotchSimPaths <- function(params, verbose) { + + paths.means <- getParam(params, "paths.means") + + if (length(paths.means) != 0) { + if (verbose) {message("Using defined path means...")} + return(params) } - # Generate paths if (verbose) {message("Simulating paths...")} + nGenes <- getParam(params, "nGenes") paths.design <- getParam(params, "paths.design") network.graph <- getParam(params, "network.graph") network.weights <- igraph::as_adjacency_matrix(network.graph, @@ -66,6 +173,7 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { network.nRegs <- getParam(params, "network.nRegs") network.isReg <- igraph::vertex_attr(network.graph, "IsReg") paths.nPrograms <- getParam(params, "paths.nPrograms") + programs.weights <- matrix(rnorm(network.nRegs * paths.nPrograms), nrow = network.nRegs, ncol = paths.nPrograms) paths.changes <- vector("list", nrow(paths.design)) @@ -74,7 +182,7 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { paths.graph <- igraph::graph_from_data_frame(paths.design) paths.order <- names(igraph::topo_sort(paths.graph, mode = "in")) paths.order <- as.numeric(paths.order) - # The origin is not a path + # Remove the origin because it is not a path paths.order <- paths.order[paths.order != 0] for (path in paths.order) { @@ -89,7 +197,7 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { } for (step in seq_len(nSteps) + 1) { - programs.changes <- rnorm(paths.nPrograms) + programs.changes <- rnorm(paths.nPrograms, sd = 0.01) reg.changes <- as.vector(programs.weights %*% programs.changes) changes[network.isReg, step] <- reg.changes change <- as.vector(changes[, step - 1] %*% network.weights) @@ -100,50 +208,58 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { changes <- changes[, 1:nSteps] factors <- matrixStats::rowCumsums(changes) } else { - changes <- changes[, 2:nSteps + 1] + changes <- changes[, 2:(nSteps + 1)] from.factors <- paths.factors[[from]][, ncol(paths.factors[[from]])] factors <- matrixStats::rowCumsums(changes) + from.factors } paths.changes[[path]] <- changes paths.factors[[path]] <- factors } - means.values <- getParam(params, "mean.values") + + mean.values <- getParam(params, "mean.values") paths.means <- lapply(paths.factors, function(x) { - 2 ^ x * means.values + (2 ^ x) * mean.values }) + names(paths.means) <- paste0("Path", paths.design$Path) params <- setParam(params, "paths.means", paths.means) + return(params) +} + +splotchSimLibSizes <- function(sim, params, verbose) { + if (verbose) {message("Simulating library sizes...")} nCells <- getParam(params, "nCells") lib.loc <- getParam(params, "lib.loc") lib.scale <- getParam(params, "lib.scale") - lib.sizes <- rlnorm(nCells, lib.loc, lib.scale) - if (verbose) {message("Simulating cell means...")} + exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale) + colData(sim)$ExpLibSize <- exp.lib.sizes + + return(sim) +} + +#' @importFrom stats dbeta +splotchSimCellMeans <- function(sim, params, verbose) { + + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene nCells <- getParam(params, "nCells") cells.design <- getParam(params, "cells.design") + paths.design <- getParam(params, "paths.design") + paths.means <- getParam(params, "paths.means") + exp.lib.sizes <- colData(sim)$ExpLibSize + + if (verbose) {message("Assigning cells to paths...")} cells.paths <- sample(cells.design$Path, nCells, replace = TRUE, prob = cells.design$Probability) - paths.design <- getParam(params, "paths.design") + + if (verbose) {message("Assigning cells to steps...")} paths.cells.design <- merge(paths.design, cells.design) steps.probs <- apply(paths.cells.design, 1, function(path) { steps <- path["Steps"] - dens <- dbeta(seq(0, 1, length.out = steps), - path["Alpha"], path["Beta"]) - # Adjust for infinite values at edge of distribution - dens.inf <- !is.finite(dens) - if (any(dens.inf) && all(dens[!dens.inf] == 0)) { - dens[dens.inf] <- 1 - } - if (!is.finite(dens[1])) { - dens[1] <- 1.1 * dens[2] - } - if (!is.finite(dens[steps])) { - dens[steps] <- 1.1 * dens[steps - 1] - } - - probs <- dens / sum(dens) + probs <- getBetaStepProbs(path["Steps"], path["Alpha"], path["Beta"]) # Return a list to avoid getting a matrix if all path lengths are equal return(list(probs)) @@ -151,11 +267,14 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { # Remove unnecessary list level steps.probs <- lapply(steps.probs, "[[", 1) names(steps.probs) <- paths.cells.design$Path + cells.steps <- sapply(cells.paths, function(path) { probs <- steps.probs[[path]] step <- sample(1:length(probs), 1, prob = probs) return(step) }) + + if (verbose) {message("Simulating cell means...")} cells.means <- sapply(seq_len(nCells), function(cell) { path <- cells.paths[cell] step <- cells.steps[cell] @@ -163,44 +282,57 @@ splotchSimulate <- function(params = newSplotchParams(), verbose = TRUE, ...) { return(means) }) - # sim <- SingleCellExperiment(assays = list(counts = counts), - # rowData = features, - # colData = cells, - # metadata = list(params = params)) - # - # return(sim) + # Adjust mean based on library size + cells.props <- t(t(cells.means) / colSums(cells.means)) + cells.means <- t(t(cells.props) * exp.lib.sizes) - return(params) + colnames(cells.means) <- cell.names + rownames(cells.means) <- gene.names + + colData(sim)$Path <- cells.paths + colData(sim)$Step <- cells.steps + assays(sim)$CellMeans <- cells.means + + return(sim) } -generateNetwork <- function(n.nodes, verbose) { +splotchSimCounts <- function(sim, params, verbose) { - if (verbose) {message("Generating gene network...")} + if (verbose) {message("Simulating counts...")} + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene + nGenes <- getParam(params, "nGenes") + nCells <- getParam(params, "nCells") + cells.means <- assays(sim)$CellMeans - graph.raw <- igraph::sample_forestfire(n.nodes, 0.1) - graph.data <- igraph::get.data.frame(graph.raw) - graph.data <- graph.data[, c("from", "to")] - graph.data$weight <- rnorm(nrow(graph.data)) - graph <- igraph::graph.data.frame(graph.data) - graph <- igraph::set_vertex_attr(graph, "mean", - value = rnorm(igraph::gorder(graph))) + true.counts <- matrix(rpois( + as.numeric(nGenes) * as.numeric(nCells), + lambda = cells.means), + nrow = nGenes, ncol = nCells) - return(graph) + colnames(true.counts) <- cell.names + rownames(true.counts) <- gene.names + assays(sim)$counts <- true.counts + + return(sim) } -selectRegulators <- function(graph, nReg, verbose) { +getBetaStepProbs <- function(steps, alpha, beta) { + dens <- dbeta(seq(0, 1, length.out = steps), alpha, beta) - if (verbose) {message("Selecting regulators...")} + # Adjust for infinite values at edge of distribution + dens.inf <- !is.finite(dens) + if (any(dens.inf) && all(dens[!dens.inf] == 0)) { + dens[dens.inf] <- 1 + } + if (!is.finite(dens[1])) { + dens[1] <- 1.1 * dens[2] + } + if (!is.finite(dens[steps])) { + dens[steps] <- 1.1 * dens[steps - 1] + } - out.degree <- igraph::degree(graph, mode = "out") - in.degree <- igraph::degree(graph, mode = "in") - reg.prob <- out.degree - in.degree - reg.prob <- reg.prob + rnorm(length(reg.prob)) - reg.prob[reg.prob <= 0] <- 1e-10 - reg.prob <- reg.prob / sum(reg.prob) - reg.nodes <- names(rev(sort(reg.prob))[1:nReg]) - is.reg <- igraph::V(graph)$name %in% reg.nodes - graph <- igraph::set_vertex_attr(graph, "IsReg", value = is.reg) + probs <- dens / sum(dens) - return(graph) + return(probs) }