diff --git a/R/lun-simulate.R b/R/lun-simulate.R index abeafcb3d851a0d972bdc09c8ff2fce39de1b0f3..8ced43ea7aa7cbc1026282d84f43633961b5e21d 100644 --- a/R/lun-simulate.R +++ b/R/lun-simulate.R @@ -63,6 +63,9 @@ lunSimulate <- function(params = newLunParams(), verbose = TRUE, ...) { de.upFC <- getParam(params, "de.upFC") de.downFC <- getParam(params, "de.downFC") + cell.names <- paste0("Cell", seq_len(nCells)) + gene.names <- paste0("Gene", seq_len(nGenes)) + if (verbose) {message("Simulating means...")} gene.means <- rgamma(nGenes, shape = mean.shape, rate = mean.rate) @@ -99,6 +102,8 @@ lunSimulate <- function(params = newLunParams(), verbose = TRUE, ...) { cell.facs <- unlist(cell.facs) groups <- unlist(groups) } + colnames(cell.means) <- cell.names + rownames(cell.means) <- gene.names if (verbose) {message("Simulating counts...")} counts <- matrix(rnbinom(nGenes * nCells, mu = cell.means, @@ -106,32 +111,31 @@ lunSimulate <- function(params = newLunParams(), verbose = TRUE, ...) { nrow = nGenes, ncol = nCells) if (verbose) {message("Creating final SCESet...")} - cell.names <- paste0("Cell", seq_len(nCells)) - gene.names <- paste0("Gene", seq_len(nGenes)) rownames(counts) <- gene.names colnames(counts) <- cell.names - phenos <- new("AnnotatedDataFrame", - data = data.frame(Cell = cell.names, CellFac = cell.facs)) - rownames(phenos) <- cell.names - features <- new("AnnotatedDataFrame", - data = data.frame(Gene = gene.names, GeneMean = gene.means)) - rownames(features) <- gene.names - sim <- newSCESet(countData = counts, phenoData = phenos, - featureData = features) + cells <- data.frame(Cell = cell.names, CellFac = cell.facs) + rownames(cells) <- cell.names - colnames(cell.means) <- cell.names - rownames(cell.means) <- gene.names - set_exprs(sim, "CellMeans") <- cell.means + features <- data.frame(Gene = gene.names, GeneMean = gene.means) + rownames(features) <- gene.names if (nGroups > 1) { - pData(sim)$Group <- groups + cells$Group <- groups for (idx in seq_along(de.facs)) { - fData(sim)[[paste0("DEFacGroup", idx)]] <- de.facs[[idx]] - fData(sim)[[paste0("GeneMeanGroup", idx)]] <- gene.means * + features[[paste0("DEFacGroup", idx)]] <- de.facs[[idx]] + features[[paste0("GeneMeanGroup", idx)]] <- gene.means * de.facs[[idx]] } } + sim <- SingleCellExperiment(assays = list(counts = counts, + CellMeans = cell.means), + rowData = features, + colData = cells, + metadata = list(params = params)) + + if (verbose) {message("Done!")} + return(sim) } diff --git a/R/lun2-simulate.R b/R/lun2-simulate.R index bc8d9b696ad8eeb0086c664ca8aa2028d281cd03..439e3c60ae6353b7d861a31b3627e4e74c638ee8 100644 --- a/R/lun2-simulate.R +++ b/R/lun2-simulate.R @@ -91,31 +91,13 @@ lun2Simulate <- function(params = newLun2Params(), zinb = FALSE, de.nGenes <- getParam(params, "de.nGenes") - #if (name == "nGenes") { - # old.nGenes <- getParam(object, "nGenes") - # if (value != old.nGenes) { - # warning("nGenes has been changed. Gene parameter vectors will be ", - # "sampled to length new nGenes.") - # selected <- sample(seq_len(old.nGenes), size = value, - # replace = TRUE) - # for (parameter in grep("gene", slotNames(object), value = TRUE)) { - # old.value <- getParam(object, parameter) - # object <- setParamUnchecked(object, parameter, - # old.value[selected]) - # } - # } - #} - - # Set up objects to store intermediate values cell.names <- paste0("Cell", seq_len(nCells)) gene.names <- paste0("Gene", seq_len(nGenes)) - features <- new("AnnotatedDataFrame", - data = data.frame(Gene = gene.names, GeneMean = gene.means, - GeneDisp = gene.disps)) - phenos <- new("AnnotatedDataFrame", - data = data.frame(Cell = cell.names, Plate = cell.plates)) + features <- data.frame(Gene = gene.names, GeneMean = gene.means, + GeneDisp = gene.disps) + cells <- data.frame(Cell = cell.names, Plate = cell.plates) if (zinb) { features$GeneZeroProp <- gene.ziProps @@ -166,7 +148,7 @@ lun2Simulate <- function(params = newLun2Params(), zinb = FALSE, if (verbose) {message("Simulating libray size factors...")} lib.facs <- lib.sizes / mean(lib.sizes) lib.facs <- sample(lib.facs, nCells, replace = TRUE) * lib.mod - phenos$LibSizeFac <- lib.facs + cells$LibSizeFac <- lib.facs if (verbose) {message("Simulating cell means...")} cell.means <- plate.means[, as.integer(cell.plates)] @@ -187,25 +169,26 @@ lun2Simulate <- function(params = newLun2Params(), zinb = FALSE, if (verbose) {message("Creating final SCESet...")} - rownames(phenos) <- cell.names + rownames(cells) <- cell.names rownames(features) <- gene.names rownames(counts) <- gene.names colnames(counts) <- cell.names - sim <- newSCESet(countData = counts, phenoData = phenos, - featureData = features) - rownames(cell.means) <- gene.names colnames(cell.means) <- cell.names - set_exprs(sim, "CellMeans") <- cell.means - rownames(true.counts) <- gene.names colnames(true.counts) <- cell.names - set_exprs(sim, "TrueCounts") <- true.counts + + sim <- SingleCellExperiment(assays = list(counts = counts, + CellMeans = cell.means, + TrueCounts <- true.counts), + rowData = features, + colData = cells, + metadata = list(params = params)) if (zinb) { rownames(is.zero) <- gene.names colnames(is.zero) <- cell.names - set_exprs(sim, "ZeroInflation") <- is.zero + assays(sim)$ZeroInflation <- is.zero } if (verbose) {message("Done!")} diff --git a/R/scDD-simulate.R b/R/scDD-simulate.R index 478443f68054c276c49d75e9418c93e18b9d4046..b3d949827160a8449ff90cf2b458d01323d5f16b 100644 --- a/R/scDD-simulate.R +++ b/R/scDD-simulate.R @@ -100,16 +100,19 @@ scDDSimulate <- function(params = newSCDDParams(), plots = FALSE, rownames(counts) <- gene.names colnames(counts) <- cell.names - phenos <- new("AnnotatedDataFrame", - data = data.frame(Cell = cell.names, - Condition = rep(1:2, each = nCells))) - rownames(phenos) <- cell.names - features <- new("AnnotatedDataFrame", - data = data.frame(Gene = gene.names, DEStatus = de.status, - FoldChange = foldchanges)) + + cells <- data.frame(Cell = cell.names, + Condition = rep(1:2, each = nCells)) + rownames(cells) <- cell.names + + features <- data.frame(Gene = gene.names, DEStatus = de.status, + FoldChange = foldchanges) rownames(features) <- gene.names - sim <- newSCESet(countData = counts, phenoData = phenos, - featureData = features) + + sim <- SingleCellExperiment(assays = list(counts = counts), + rowData = features, + colData = cells, + metadata = list(params = params)) if (verbose) {message("Done!")} diff --git a/R/simple-simulate.R b/R/simple-simulate.R index e52b216440bb1f567fdaa006e247f885e18361fd..add68255c0f3a9e8b91a283cb88518e4704254d1 100644 --- a/R/simple-simulate.R +++ b/R/simple-simulate.R @@ -53,13 +53,15 @@ simpleSimulate <- function(params = newSimpleParams(), verbose = TRUE, ...) { rownames(counts) <- gene.names colnames(counts) <- cell.names - phenos <- new("AnnotatedDataFrame", data = data.frame(Cell = cell.names)) - rownames(phenos) <- cell.names - features <- new("AnnotatedDataFrame", - data = data.frame(Gene = gene.names, GeneMean = means)) + cells <- data.frame(Cell = cell.names) + rownames(cells) <- cell.names + features <- data.frame(Gene = gene.names, GeneMean = means) rownames(features) <- gene.names - sim <- newSCESet(countData = counts, phenoData = phenos, - featureData = features) + + sim <- SingleCellExperiment(assays = list(counts = counts), + rowData = features, + colData = cells, + metadata = list(params = params)) return(sim) } diff --git a/R/splat-simulate.R b/R/splat-simulate.R index 5256804ea4ee0dc210b9d3cfe502bb28ab041211..7967e701b10e6021acd6196b316a0222d730fa10 100644 --- a/R/splat-simulate.R +++ b/R/splat-simulate.R @@ -159,27 +159,24 @@ splatSimulate <- function(params = newSplatParams(), } # Create SCESet with dummy counts to store simulation - dummy.counts <- matrix(1, ncol = nCells, nrow = nGenes) - rownames(dummy.counts) <- gene.names - colnames(dummy.counts) <- cell.names - phenos <- new("AnnotatedDataFrame", data = data.frame(Cell = cell.names)) - rownames(phenos) <- cell.names - features <- new("AnnotatedDataFrame", data = data.frame(Gene = gene.names)) + cells <- data.frame(Cell = cell.names) + rownames(cells) <- cell.names + features <- data.frame(Gene = gene.names) rownames(features) <- gene.names - sim <- newSCESet(countData = dummy.counts, phenoData = phenos, - featureData = features) + sim <- SingleCellExperiment(rowData = features, colData = cells, + metadata = list(params = params)) # Make batches vector which is the index of param$batchCells repeated # params$batchCells[index] times batches <- lapply(seq_len(nBatches), function(i, b) {rep(i, b[i])}, b = batch.cells) batches <- unlist(batches) - pData(sim)$Batch <- batch.names[batches] + colData(sim)$Batch <- batch.names[batches] if (method != "single") { groups <- sample(seq_len(nGroups), nCells, prob = group.prob, replace = TRUE) - pData(sim)$Group <- group.names[groups] + colData(sim)$Group <- group.names[groups] } if (verbose) {message("Simulating library sizes...")} @@ -211,21 +208,8 @@ splatSimulate <- function(params = newSplatParams(), if (verbose) {message("Simulating dropout (if needed)...")} sim <- splatSimDropout(sim, params) - if (verbose) {message("Creating final SCESet...")} - # Create new SCESet to make sure values are calculated correctly - sce <- newSCESet(countData = counts(sim), - phenoData = new("AnnotatedDataFrame", data = pData(sim)), - featureData = new("AnnotatedDataFrame", data = fData(sim))) - - # Add intermediate matrices stored in assayData - for (assay.name in names(assayData(sim))) { - if (!(assay.name %in% names(assayData(sce)))) { - set_exprs(sce, assay.name) <- get_exprs(sim, assay.name) - } - } - if (verbose) {message("Done!")} - return(sce) + return(sim) } #' @rdname splatSimulate @@ -272,7 +256,7 @@ splatSimLibSizes <- function(sim, params) { lib.scale <- getParam(params, "lib.scale") exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale) - pData(sim)$ExpLibSize <- exp.lib.sizes + colData(sim)$ExpLibSize <- exp.lib.sizes return(sim) } @@ -311,9 +295,9 @@ splatSimGeneMeans <- function(sim, params) { means.gene <- base.means.gene means.gene[is.outlier] <- outlier.means[is.outlier] - fData(sim)$BaseGeneMean <- base.means.gene - fData(sim)$OutlierFactor <- outlier.facs - fData(sim)$GeneMean <- means.gene + rowData(sim)$BaseGeneMean <- base.means.gene + rowData(sim)$OutlierFactor <- outlier.facs + rowData(sim)$GeneMean <- means.gene return(sim) } @@ -336,13 +320,13 @@ splatSimBatchEffects <- function(sim, params) { nBatches <- getParam(params, "nBatches") batch.facLoc <- getParam(params, "batch.facLoc") batch.facScale <- getParam(params, "batch.facScale") - means.gene <- fData(sim)$GeneMean + means.gene <- rowData(sim)$GeneMean for (idx in seq_len(nBatches)) { batch.facs <- getLNormFactors(nGenes, 1, 0.5, batch.facLoc[idx], batch.facScale[idx]) batch.means.gene <- means.gene * batch.facs - fData(sim)[[paste0("BatchFacBatch", idx)]] <- batch.facs + rowData(sim)[[paste0("BatchFacBatch", idx)]] <- batch.facs } return(sim) @@ -362,15 +346,15 @@ splatSimBatchEffects <- function(sim, params) { splatSimBatchCellMeans <- function(sim, params) { nBatches <- getParam(params, "nBatches") - cell.names <- pData(sim)$Cell - gene.names <- fData(sim)$Gene - gene.means <- fData(sim)$GeneMean + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene + gene.means <- rowData(sim)$GeneMean if (nBatches > 1) { - batches <- pData(sim)$Batch + batches <- colData(sim)$Batch batch.names <- unique(batches) - batch.facs.gene <- fData(sim)[, paste0("BatchFac", batch.names)] + batch.facs.gene <- rowData(sim)[, paste0("BatchFac", batch.names)] batch.facs.cell <- as.matrix(batch.facs.gene[, factor(batches)]) } else { @@ -384,7 +368,7 @@ splatSimBatchCellMeans <- function(sim, params) { colnames(batch.means.cell) <- cell.names rownames(batch.means.cell) <- gene.names - set_exprs(sim, "BatchCellMeans") <- batch.means.cell + assays(sim)$BatchCellMeans <- batch.means.cell return(sim) } @@ -414,13 +398,13 @@ splatSimGroupDE <- function(sim, params) { de.downProb <- getParam(params, "de.downProb") de.facLoc <- getParam(params, "de.facLoc") de.facScale <- getParam(params, "de.facScale") - means.gene <- fData(sim)$GeneMean + means.gene <- rowData(sim)$GeneMean for (idx in seq_len(nGroups)) { de.facs <- getLNormFactors(nGenes, de.prob[idx], de.downProb[idx], de.facLoc[idx], de.facScale[idx]) group.means.gene <- means.gene * de.facs - fData(sim)[[paste0("DEFacGroup", idx)]] <- de.facs + rowData(sim)[[paste0("DEFacGroup", idx)]] <- de.facs } return(sim) @@ -441,14 +425,14 @@ splatSimPathDE <- function(sim, params) { for (path in path.order) { from <- path.from[path] if (from == 0) { - means.gene <- fData(sim)$GeneMean + means.gene <- rowData(sim)$GeneMean } else { - means.gene <- fData(sim)[[paste0("GeneMeanPath", from)]] + means.gene <- rowData(sim)[[paste0("GeneMeanPath", from)]] } de.facs <- getLNormFactors(nGenes, de.prob[path], de.downProb[path], de.facLoc[path], de.facScale[path]) path.means.gene <- means.gene * de.facs - fData(sim)[[paste0("DEFacPath", path)]] <- de.facs + rowData(sim)[[paste0("DEFacPath", path)]] <- de.facs } return(sim) @@ -476,10 +460,10 @@ NULL splatSimSingleCellMeans <- function(sim, params) { nCells <- getParam(params, "nCells") - cell.names <- pData(sim)$Cell - gene.names <- fData(sim)$Gene - exp.lib.sizes <- pData(sim)$ExpLibSize - batch.means.cell <- get_exprs(sim, "BatchCellMeans") + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene + exp.lib.sizes <- colData(sim)$ExpLibSize + batch.means.cell <- assays(sim)$BatchCellMeans cell.means.gene <- batch.means.cell cell.props.gene <- t(t(cell.means.gene) / colSums(cell.means.gene)) @@ -487,7 +471,7 @@ splatSimSingleCellMeans <- function(sim, params) { colnames(base.means.cell) <- cell.names rownames(base.means.cell) <- gene.names - set_exprs(sim, "BaseCellMeans") <- base.means.cell + assays(sim)$BaseCellMeans <- base.means.cell return(sim) } @@ -498,22 +482,22 @@ splatSimSingleCellMeans <- function(sim, params) { splatSimGroupCellMeans <- function(sim, params) { nGroups <- getParam(params, "nGroups") - cell.names <- pData(sim)$Cell - gene.names <- fData(sim)$Gene - groups <- pData(sim)$Group + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene + groups <- colData(sim)$Group group.names <- sort(unique(groups)) - exp.lib.sizes <- pData(sim)$ExpLibSize - batch.means.cell <- get_exprs(sim, "BatchCellMeans") + exp.lib.sizes <- colData(sim)$ExpLibSize + batch.means.cell <- assays(sim)$BatchCellMeans - group.facs.gene <- fData(sim)[, paste0("DEFac", group.names)] - cell.facs.gene <- as.matrix(group.facs.gene[, factor(groups)]) + group.facs.gene <- rowData(sim)[, paste0("DEFac", group.names)] + cell.facs.gene <- as.matrix(group.facs.gene[, paste0("DEFac", groups)]) cell.means.gene <- batch.means.cell * cell.facs.gene cell.props.gene <- t(t(cell.means.gene) / colSums(cell.means.gene)) base.means.cell <- t(t(cell.props.gene) * exp.lib.sizes) colnames(base.means.cell) <- cell.names rownames(base.means.cell) <- gene.names - set_exprs(sim, "BaseCellMeans") <- base.means.cell + assays(sim)$BaseCellMeans <- base.means.cell return(sim) } @@ -527,16 +511,16 @@ splatSimPathCellMeans <- function(sim, params) { nGenes <- getParam(params, "nGenes") nCells <- getParam(params, "nCells") nGroups <- getParam(params, "nGroups") - cell.names <- pData(sim)$Cell - gene.names <- fData(sim)$Gene + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene path.from <- getParam(params, "path.from") path.length <- getParam(params, "path.length") path.skew <- getParam(params, "path.skew") path.nonlinearProb <- getParam(params, "path.nonlinearProb") path.sigmaFac <- getParam(params, "path.sigmaFac") - groups <- pData(sim)$Group - exp.lib.sizes <- pData(sim)$ExpLibSize - batch.means.cell <- get_exprs(sim, "BatchCellMeans") + groups <- colData(sim)$Group + exp.lib.sizes <- colData(sim)$ExpLibSize + batch.means.cell <- assays(sim)$BatchCellMeans group.sizes <- table(groups) @@ -546,7 +530,7 @@ splatSimPathCellMeans <- function(sim, params) { is.nonlinear <- as.logical(rbinom(nGenes, 1, path.nonlinearProb)) sigma.facs <- rep(0, nGenes) sigma.facs[is.nonlinear] <- path.sigmaFac - fData(sim)[[paste0("SigmaFacPath", idx)]] <- sigma.facs + rowData(sim)[[paste0("SigmaFacPath", idx)]] <- sigma.facs } # Generate non-linear path factors @@ -555,7 +539,7 @@ splatSimPathCellMeans <- function(sim, params) { is.nonlinear <- as.logical(rbinom(nGenes, 1, path.nonlinearProb)) sigma.facs <- rep(0, nGenes) sigma.facs[is.nonlinear] <- path.sigmaFac - fData(sim)[[paste0("SigmaFacPath", idx)]] <- sigma.facs + rowData(sim)[[paste0("SigmaFacPath", idx)]] <- sigma.facs } # Generate paths. Each path is a matrix with path.length columns and @@ -566,13 +550,13 @@ splatSimPathCellMeans <- function(sim, params) { if (from == 0) { facs.start <- rep(1, nGenes) } else { - facs.start <- fData(sim)[[paste0("DEFacPath", from)]] + facs.start <- rowData(sim)[[paste0("DEFacPath", from)]] } # Find the factors at the end position - facs.end <- fData(sim)[[paste0("DEFacPath", idx)]] + facs.end <- rowData(sim)[[paste0("DEFacPath", idx)]] # Get the non-linear factors - sigma.facs <- fData(sim)[[paste0("SigmaFacPath", idx)]] + sigma.facs <- rowData(sim)[[paste0("SigmaFacPath", idx)]] # Build Brownian bridges from start to end steps <- buildBridges(facs.start, facs.end, n = path.length[idx], @@ -609,9 +593,8 @@ splatSimPathCellMeans <- function(sim, params) { colnames(base.means.cell) <- cell.names rownames(base.means.cell) <- gene.names - #pData(sim)$Step <- unlist(cell.steps) - pData(sim)$Step <- steps - set_exprs(sim, "BaseCellMeans") <- base.means.cell + colData(sim)$Step <- steps + assays(sim)$BaseCellMeans <- base.means.cell return(sim) } @@ -632,13 +615,13 @@ splatSimPathCellMeans <- function(sim, params) { #' @importFrom stats rchisq rgamma splatSimBCVMeans <- function(sim, params) { - cell.names <- pData(sim)$Cell - gene.names <- fData(sim)$Gene + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene nGenes <- getParam(params, "nGenes") nCells <- getParam(params, "nCells") bcv.common <- getParam(params, "bcv.common") bcv.df <- getParam(params, "bcv.df") - base.means.cell <- get_exprs(sim, "BaseCellMeans") + base.means.cell <- assays(sim)$BaseCellMeans if (is.finite(bcv.df)) { bcv <- (bcv.common + (1 / sqrt(base.means.cell))) * @@ -655,8 +638,8 @@ splatSimBCVMeans <- function(sim, params) { colnames(means.cell) <- cell.names rownames(means.cell) <- gene.names - set_exprs(sim, "BCV") <- bcv - set_exprs(sim, "CellMeans") <- means.cell + assays(sim)$BCV <- bcv + assays(sim)$CellMeans <- means.cell return(sim) } @@ -677,11 +660,11 @@ splatSimBCVMeans <- function(sim, params) { #' @importFrom stats rpois splatSimTrueCounts <- function(sim, params) { - cell.names <- pData(sim)$Cell - gene.names <- fData(sim)$Gene + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene nGenes <- getParam(params, "nGenes") nCells <- getParam(params, "nCells") - cell.means <- get_exprs(sim, "CellMeans") + cell.means <- assays(sim)$CellMeans true.counts <- matrix(rpois(nGenes * nCells, lambda = cell.means), nrow = nGenes, ncol = nCells) @@ -689,7 +672,7 @@ splatSimTrueCounts <- function(sim, params) { colnames(true.counts) <- cell.names rownames(true.counts) <- gene.names - set_exprs(sim, "TrueCounts") <- true.counts + assays(sim)$TrueCounts <- true.counts return(sim) } @@ -712,11 +695,11 @@ splatSimTrueCounts <- function(sim, params) { splatSimDropout <- function(sim, params) { dropout.present <- getParam(params, "dropout.present") - true.counts <- get_exprs(sim, "TrueCounts") + true.counts <- assays(sim)$TrueCounts if (dropout.present) { - cell.names <- pData(sim)$Cell - gene.names <- fData(sim)$Gene + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene nCells <- getParam(params, "nCells") nGenes <- getParam(params, "nGenes") dropout.mid <- getParam(params, "dropout.mid") @@ -740,13 +723,13 @@ splatSimDropout <- function(sim, params) { colnames(keep) <- cell.names rownames(keep) <- gene.names - set_exprs(sim, "DropProb") <- drop.prob - set_exprs(sim, "Dropout") <- !keep + assays(sim)$DropProb <- drop.prob + assays(sim)$Dropout <- !keep } else { counts <- true.counts } - scater::counts(sim) <- counts + counts(sim) <- counts return(sim) }