diff --git a/.travis.yml b/.travis.yml index dbfba047994072d0aa4dfddef0317466be67c241..b405991321f9c3288f48e071911e38f31b0b2cb1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,9 @@ # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r language: R -r: bioc-release +r: + - bioc-devel + - bioc-release sudo: false cache: packages diff --git a/DESCRIPTION b/DESCRIPTION index 8852f51948ba007b2c6c414ae87ce6351f6540cd..530d696732d9a2b63f192c508f7407d239ff03b6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splatter Type: Package Title: Simple Simulation of Single-cell RNA Sequencing Data -Version: 0.99.8 -Date: 2017-01-23 +Version: 0.99.9 +Date: 2017-02-02 Author: Luke Zappia Authors@R: c(person("Luke", "Zappia", role = c("aut", "cre"), @@ -50,5 +50,5 @@ biocViews: SingleCell, RNASeq, Transcriptomics, GeneExpression, Sequencing, Software URL: https://github.com/Oshlack/splatter BugReports: https://github.com/Oshlack/splatter/issues -RoxygenNote: 5.0.1 +RoxygenNote: 6.0.0 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 0bfc62ec954c7c9f3d854b6de40e1955408efa87..b6d494ae6c6250912c1ccab26227328dc44d9374 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ S3method(simpleEstimate,SCESet) S3method(simpleEstimate,matrix) S3method(splatEstimate,SCESet) S3method(splatEstimate,matrix) +export(addGeneLengths) export(compareSCESets) export(getParam) export(getParams) diff --git a/NEWS.md b/NEWS.md index 8611fd50b5d4c21fd161d4f70749e1a88f0faf2a..43207e0429ec2c06975707360281973d7eab0fd5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# 0.99.9 + +* Add addGeneLengths function +* Update scDD reference + # 0.99.8 * Fix bug that meant non-linear path factors weren't stored in output diff --git a/R/SCESet-functions.R b/R/SCESet-functions.R index 38e52c7b2ca91c99cfef399f8acaedfd0099bbfb..63178257a4053fd8b9580561372e3f4b26048d88 100644 --- a/R/SCESet-functions.R +++ b/R/SCESet-functions.R @@ -66,4 +66,75 @@ addFeatureStats <- function(sce, value = c("counts", "cpm", "tpm", "fpkm"), fData(sce)[, mad.str] <- matrixStats::rowMads(values, na.rm = TRUE) return(sce) -} \ No newline at end of file +} + +#' Add gene lengths +#' +#' Add gene lengths to an SCESet object +#' +#' @param sce SCESet to add gene lengths to. +#' @param method Method to use for creating lengths. +#' @param loc Location parameter for the generate method. +#' @param scale Scale parameter for the generate method. +#' @param lengths Vector of lengths for the sample method. +#' +#' @details +#' This function adds simulated gene lengths to the \code{fData} slot of an +#' \code{SCESet} object that can be used for calculating length normalised +#' expression values such as TPM or FPKM. The \code{generate} simulates lengths +#' using a (rounded) log-normal distribution, with the default \code{loc} and +#' \code{scale} parameters based on human coding genes. Alternatively the +#' \code{sample} method can be used which randomly samples lengths (with +#' replacement) from a supplied vector. +#' +#' @return SCESet with added gene lengths +#' @examples +#' # Default generate method +#' sce <- simpleSimulate() +#' sce <- addGeneLengths(sce) +#' head(fData(sce)) +#' # Sample method (human coding genes) +#' \dontrun{ +#' library(TxDb.Hsapiens.UCSC.hg19.knownGene) +#' library(GenomicFeatures) +#' txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene +#' tx.lens <- transcriptLengths(txdb, with.cds_len = TRUE) +#' tx.lens <- tx.lens[tx.lens$cds_len > 0, ] +#' gene.lens <- max(splitAsList(tx.lens$tx_len, tx.lens$gene_id)) +#' sce <- addGeneLengths(sce, method = "sample", lengths = gene.lens) +#' } +#' @export +#' @importFrom stats rlnorm +addGeneLengths <- function(sce, method = c("generate", "sample"), loc = 7.9, + scale = 0.7, lengths = NULL) { + + method <- match.arg(method) + checkmate::assertClass(sce, "SCESet") + checkmate::assertNumber(loc) + checkmate::assertNumber(scale, lower = 0) + checkmate::assertNumeric(lengths, lower = 0, null.ok = TRUE) + + switch(method, + generate = { + sim.lengths <- rlnorm(nrow(sce), meanlog = loc, sdlog = scale) + sim.lengths <- round(sim.lengths) + }, + sample = { + if (is.null(lengths)) { + stop("Lengths must be supplied to use the sample method.") + } else { + sim.lengths <- sample(lengths, nrow(sce), replace = TRUE) + } + } + ) + + fData(sce)$Length <- sim.lengths + + return(sce) +} + +#txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene +#tx.lens <- GenomicFeatures::transcriptLengths(txdb, with.cds_len = TRUE) +#tx.lens <- tx.lens[tx.lens$cds_len > 0, ] +#gene.lens <- max(IRanges::splitAsList(tx.lens$tx_len, tx.lens$gene_id)) +#lens <- rlnorm(length(gene.lens), meanlog = 7.9, sdlog = 0.7) diff --git a/R/listSims.R b/R/listSims.R index 3fe0c8184b51dfb9036686b83b2348b9e7b616a7..359e8f0ceca22ca0dd01e3a4eece273432ab1846 100644 --- a/R/listSims.R +++ b/R/listSims.R @@ -42,7 +42,8 @@ listSims <- function(print = TRUE) { Differential expression can be added between two groups of plates and optionally a zero-inflated negative-binomial can be used."), - c("scDD", "scDD", "10.1101/035501", "kdkorthauer/scDD", + c("scDD", "scDD", "10.1186/s13059-016-1077-y", + "kdkorthauer/scDD", "The scDD simulation samples a given dataset and can simulate differentially expressed and differentially distributed genes between two conditions.")) @@ -70,4 +71,4 @@ listSims <- function(print = TRUE) { } invisible(sims.table) -} \ No newline at end of file +} diff --git a/R/scDD-simulate.R b/R/scDD-simulate.R index fe70aa680302987ce43503e857d626b72f47a30b..c91706bfdd0d9cc84277216bc35686060eba653f 100644 --- a/R/scDD-simulate.R +++ b/R/scDD-simulate.R @@ -19,11 +19,11 @@ #' @return SCESet containing simulated counts #' #' @references -#' Korthauer KD, Chu L-F, Newton MA, Li Y, Thomson J, Stewart R, et al. scDD: A +#' Korthauer KD, Chu L-F, Newton MA, Li Y, Thomson J, Stewart R, et al. A #' statistical approach for identifying differential distributions in -#' single-cell RNA-seq experiments. bioRxiv (2015). +#' single-cell RNA-seq experiments. Genome Biology (2016). #' -#' Paper: \url{dx.doi.org/10.1101/035501} +#' Paper: \url{10.1186/s13059-016-1077-y} #' #' Code: \url{https://github.com/kdkorthauer/scDD} #' diff --git a/README.md b/README.md index 90505e847ab678b19b30b193624acd8c748be0e2..9f52c1d4a06edd89112669116ebd03b94cb9d30f 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@  Splatter is an R package for the simple simulation of single-cell RNA sequencing -data. Splatter provides a common interface that has: +data. Splatter provides a common interface to multiple simulations that have: * Functions for estimating simulation parameters * Objects for storing those parameters @@ -20,45 +20,53 @@ Splatter is built on top of [`scater`][scater] and stores simulations in ### Development version -Splatter is currently under consideration for acceptance into Bioconductor -(you can follow the progress [here][contrib]) and hence requires the development -version of R (>=3.4) and the development version of Bioconductor (3.5). +Splatter has been accepted into the development version of [Bioconductor][bioc] +and hence requires the development version of R (>=3.4) and the development +version of Bioconductor (3.5). -If you have these installed Splatter can be installed from Github using +If you have these installed Splatter can be installed from Bioconductor using `biocLite`: ```{r} source("https://bioconductor.org/biocLite.R") -biocLite("Oshlack/splatter", dependencies=TRUE, build_vignettes=TRUE) +biocLite("splatter") ``` -This will also build the vignette and install suggested dependencies (which -aren't required for core functionality). Building the vignette may sometimes -fail when run from the command line, if this happens try running the install -command in RStudio. If you still have problems or want a quicker install -(without the vignette) use: +If you wish to build a local version of the vignette use: ```{r} -biocLite("Oshlack/splatter") +biocLite("splatter", build_vignettes=TRUE) ``` + +This will also build the vignette and install all suggested dependencies (which +aren't required for core functionality). Building the vignette may sometimes +fail when run from the command line, if this happens try running the install +command in RStudio. + ### Release version The last version of Splatter that is compatible with the current version of R (3.3) is v0.99.0 which can be downloaded [here][rel33]. There are no significant changes to functionality and this version should be fine for most -users until the next R/Bioconducor release. +users until the next R/Bioconductor release (in approximately April 2017). ## Getting started -Once installed the best place to get started is the vignette. Load Splatter, -then browse the vignettes: +Once installed the best place to get started is the vignette. For most users +the most convient way to access this is online [here][vignette]. + +Alternatively, if you chose to build the vignette, you can load Splatter, then +browse the vignettes: ```{r} library(splatter) browseVignettes("splatter") ``` + This is a detailed document that introduces the main features of Splatter. [scater]: https://github.com/davismcc/scater [contrib]: https://github.com/Bioconductor/Contributions/issues/209 -[rel33]: https://github.com/Oshlack/splatter/releases/tag/v0.99.0 \ No newline at end of file +[bioc]: https://bioconductor.org/packages/devel/bioc/html/splatter.html +[vignette]: https://bioconductor.org/packages/devel/bioc/vignettes/splatter/inst/doc/splatter.html +[rel33]: https://github.com/Oshlack/splatter/releases/tag/v0.99.0 diff --git a/man/addFeatureStats.Rd b/man/addFeatureStats.Rd index cc2ec995385e4761db533ccab02343735d129f4e..8d454be2663c89d8ccada477845e84b212f36550 100644 --- a/man/addFeatureStats.Rd +++ b/man/addFeatureStats.Rd @@ -33,4 +33,3 @@ variation, median and median absolute deviation. Statistics are added to the \code{fData} slot and are named \code{stat_[log]_value_[no0]} where \code{log} and \code{no0} are added if those arguments are true. } - diff --git a/man/addGeneLengths.Rd b/man/addGeneLengths.Rd new file mode 100644 index 0000000000000000000000000000000000000000..dd760ad84e2e0e1738c6c6f8142d9929b0c3cdfd --- /dev/null +++ b/man/addGeneLengths.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SCESet-functions.R +\name{addGeneLengths} +\alias{addGeneLengths} +\title{Add gene lengths} +\usage{ +addGeneLengths(sce, method = c("generate", "sample"), loc = 7.9, + scale = 0.7, lengths = NULL) +} +\arguments{ +\item{sce}{SCESet to add gene lengths to.} + +\item{method}{Method to use for creating lengths.} + +\item{loc}{Location parameter for the generate method.} + +\item{scale}{Scale parameter for the generate method.} + +\item{lengths}{Vector of lengths for the sample method.} +} +\value{ +SCESet with added gene lengths +} +\description{ +Add gene lengths to an SCESet object +} +\details{ +This function adds simulated gene lengths to the \code{fData} slot of an +\code{SCESet} object that can be used for calculating length normalised +expression values such as TPM or FPKM. The \code{generate} simulates lengths +using a (rounded) log-normal distribution, with the default \code{loc} and +\code{scale} parameters based on human coding genes. Alternatively the +\code{sample} method can be used which randomly samples lengths (with +replacement) from a supplied vector. +} +\examples{ +# Default generate method +sce <- simpleSimulate() +sce <- addGeneLengths(sce) +head(fData(sce)) +# Sample method (human coding genes) +\dontrun{ +library(TxDb.Hsapiens.UCSC.hg19.knownGene) +library(GenomicFeatures) +txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene +tx.lens <- transcriptLengths(txdb, with.cds_len = TRUE) +tx.lens <- tx.lens[tx.lens$cds_len > 0, ] +gene.lens <- max(splitAsList(tx.lens$tx_len, tx.lens$gene_id)) +sce <- addGeneLengths(sce, method = "sample", lengths = gene.lens) +} +} diff --git a/man/bridge.Rd b/man/bridge.Rd index b269c4af2a635bcda37ba3072ef584cc035431da..66277ed39ff4bdf24746b7483da1c453b4a251cd 100644 --- a/man/bridge.Rd +++ b/man/bridge.Rd @@ -24,4 +24,3 @@ Vector of length n following a path from x to y. Calculate a smoothed Brownian bridge between two points. A Brownian bridge is a random walk with fixed end points. } - diff --git a/man/compareSCESets.Rd b/man/compareSCESets.Rd index 99069e20f78e1de8183dcd0a6aa3aae5b57f0500..b66c525475e8d11a35569dee1615c2226f1d8caa 100644 --- a/man/compareSCESets.Rd +++ b/man/compareSCESets.Rd @@ -52,4 +52,3 @@ comparison <- compareSCESets(list(Splat = sim1, Simple = sim2)) names(comparison) names(comparison$Plots) } - diff --git a/man/expandParams.Rd b/man/expandParams.Rd index 9d2cd938b40ae5e3af2d080344b6bdd08032f394..5d54fc3e122f3566cf5698fde2e0e9a7a724f529 100644 --- a/man/expandParams.Rd +++ b/man/expandParams.Rd @@ -1,5 +1,6 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/AllGenerics.R, R/LunParams-methods.R, R/SplatParams-methods.R +% Please edit documentation in R/AllGenerics.R, R/LunParams-methods.R, +% R/SplatParams-methods.R \docType{methods} \name{expandParams} \alias{expandParams} @@ -25,4 +26,3 @@ Expanded object. Expand the parameters that can be vectors so that they are the same length as the number of groups. } - diff --git a/man/getLNormFactors.Rd b/man/getLNormFactors.Rd index 5da9ff6a8403bf6a69bfc58ca825e840c6365539..75533172a5f75917bb39cca676d3016d1ba4d3f7 100644 --- a/man/getLNormFactors.Rd +++ b/man/getLNormFactors.Rd @@ -24,4 +24,3 @@ Vector containing generated factors. \description{ Randomly generate multiplication factors from a log-normal distribution. } - diff --git a/man/getParam.Rd b/man/getParam.Rd index 346f0dc23e15ec2cb82904bd9c4b613ec4bc590e..d73b72864a0d39b4b4ff967bcce6212cc1481bbc 100644 --- a/man/getParam.Rd +++ b/man/getParam.Rd @@ -26,4 +26,3 @@ params <- newSimpleParams() getParam(params, "nGenes") } - diff --git a/man/getParams.Rd b/man/getParams.Rd index e9a81af064eeeeb6261cad07c3df7b86d3688f94..cb5d3ba90e25ec98be9b7dfe9c034d25da4c80ec 100644 --- a/man/getParams.Rd +++ b/man/getParams.Rd @@ -21,4 +21,3 @@ Get multiple parameter values from a Params object. params <- newSimpleParams() getParams(params, c("nGenes", "nCells", "mean.rate")) } - diff --git a/man/getPathOrder.Rd b/man/getPathOrder.Rd index d59ecfb788c32e37b8aee7fe96b2016a616b5a25..7d125e77ef9f75165b78d11c626f28a22efe13cd 100644 --- a/man/getPathOrder.Rd +++ b/man/getPathOrder.Rd @@ -17,4 +17,3 @@ Vector giving the order to process paths in. Identify the correct order to process paths so that preceding paths have already been simulated. } - diff --git a/man/listSims.Rd b/man/listSims.Rd index de52c43d61274921154a4a82ddde0b38348f06d4..90edc200c18f1fe2565dc2f0e30be9f0226c8ef0 100644 --- a/man/listSims.Rd +++ b/man/listSims.Rd @@ -20,4 +20,3 @@ brief description. \examples{ listSims() } - diff --git a/man/logistic.Rd b/man/logistic.Rd index 1cc234ca078933c870318c60a50643b7cfaf37ad..a98711efe8c53579679313ccbefae629e03ed0ab 100644 --- a/man/logistic.Rd +++ b/man/logistic.Rd @@ -19,4 +19,3 @@ Value of logistic funciton with given parameters \description{ Implementation of the logistic function } - diff --git a/man/lun2Estimate.Rd b/man/lun2Estimate.Rd index f466ef76ddcd65f8226dc58647c1f46e6a1be5c6..f63273db64cf9e71fca304f6276b1530724caed8 100644 --- a/man/lun2Estimate.Rd +++ b/man/lun2Estimate.Rd @@ -46,4 +46,3 @@ params <- lun2Estimate(sc_example_counts, plates, min.size = 20) params } } - diff --git a/man/lun2Simulate.Rd b/man/lun2Simulate.Rd index e93d83581f85e8f880b41f6b2f1214c542cd9eaa..cd48f05ea48f44b93e348a046044c7724d17a48a 100644 --- a/man/lun2Simulate.Rd +++ b/man/lun2Simulate.Rd @@ -44,4 +44,3 @@ Paper: \url{dx.doi.org/10.1101/073973} Code: \url{https://github.com/MarioniLab/PlateEffects2016} } - diff --git a/man/lunEstimate.Rd b/man/lunEstimate.Rd index ac2fde8e7bb8fb467d9eaf772def4fd073ac5250..8ee55bea76967ecf9c7fa9a895178fb7950d3c97 100644 --- a/man/lunEstimate.Rd +++ b/man/lunEstimate.Rd @@ -34,4 +34,3 @@ data("sc_example_counts") params <- lunEstimate(sc_example_counts) params } - diff --git a/man/lunSimulate.Rd b/man/lunSimulate.Rd index 4e02c74400ba2f4d62d0b533f0405563bdc7082e..4ccd94ac6181f50aef6f0e710698b88810ba9b51 100644 --- a/man/lunSimulate.Rd +++ b/man/lunSimulate.Rd @@ -45,4 +45,3 @@ Paper: \url{dx.doi.org/10.1186/s13059-016-0947-7} Code: \url{https://github.com/MarioniLab/Deconvolution2016} } - diff --git a/man/newParams.Rd b/man/newParams.Rd index 3cac16ace225cbe82264fee3c57a3e7725a25d96..197dc0204454f31bf20343896156205931392de4 100644 --- a/man/newParams.Rd +++ b/man/newParams.Rd @@ -1,9 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/AllGenerics.R, R/Lun2Params-methods.R, R/LunParams-methods.R, R/SCDDParams-methods.R, R/SimpleParams-methods.R, R/SplatParams-methods.R +% Please edit documentation in R/AllGenerics.R, R/Lun2Params-methods.R, +% R/LunParams-methods.R, R/SCDDParams-methods.R, R/SimpleParams-methods.R, +% R/SplatParams-methods.R \name{newParams} +\alias{newParams} \alias{newLun2Params} \alias{newLunParams} -\alias{newParams} \alias{newSCDDParams} \alias{newSimpleParams} \alias{newSplatParams} @@ -34,4 +36,3 @@ params <- newSimpleParams() params <- newSimpleParams(nGenes = 200, nCells = 10) } - diff --git a/man/rbindMatched.Rd b/man/rbindMatched.Rd index a0228b70c4318a98e121d81a560c252868c64bc5..a15d596575e3fd5a124de7c761cf83b8f63c2918 100644 --- a/man/rbindMatched.Rd +++ b/man/rbindMatched.Rd @@ -19,4 +19,3 @@ data.frame containing rows from \code{df1} and \code{df2} but only Bind the rows of two data frames, keeping only the columns that are common to both. } - diff --git a/man/scDDEstimate.Rd b/man/scDDEstimate.Rd index 8e470de6c3440063fdbd5337405386ae30e0c2bc..c0e309492a663c8119c76e1e4bdb28f52098c299 100644 --- a/man/scDDEstimate.Rd +++ b/man/scDDEstimate.Rd @@ -38,4 +38,3 @@ conditions <- sample(1:2, ncol(sc_example_counts), replace = TRUE) params <- scDDEstimate(sc_example_counts, conditions) params } - diff --git a/man/scDDSimulate.Rd b/man/scDDSimulate.Rd index a5d5db1df0e93a93467ec7c97de3e1fab059ce34..a1fe862025bdb20d13a6eb6b638e3c41c967bcf0 100644 --- a/man/scDDSimulate.Rd +++ b/man/scDDSimulate.Rd @@ -38,12 +38,11 @@ sim <- scDDSimulate() } } \references{ -Korthauer KD, Chu L-F, Newton MA, Li Y, Thomson J, Stewart R, et al. scDD: A +Korthauer KD, Chu L-F, Newton MA, Li Y, Thomson J, Stewart R, et al. A statistical approach for identifying differential distributions in -single-cell RNA-seq experiments. bioRxiv (2015). +single-cell RNA-seq experiments. Genome Biology (2016). -Paper: \url{dx.doi.org/10.1101/035501} +Paper: \url{10.1186/s13059-016-1077-y} Code: \url{https://github.com/kdkorthauer/scDD} } - diff --git a/man/setParam.Rd b/man/setParam.Rd index 559a1547c6d5c82b25bf2b78a8b86e14c254b585..9c1ba5341613eb970da5a70d097a52fc521d0f60 100644 --- a/man/setParam.Rd +++ b/man/setParam.Rd @@ -1,5 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/AllGenerics.R, R/Lun2Params-methods.R, R/LunParams-methods.R, R/Params-methods.R, R/SCDDParams-methods.R, R/SplatParams-methods.R +% Please edit documentation in R/AllGenerics.R, R/Lun2Params-methods.R, +% R/LunParams-methods.R, R/Params-methods.R, R/SCDDParams-methods.R, +% R/SplatParams-methods.R \docType{methods} \name{setParam} \alias{setParam} @@ -40,4 +42,3 @@ params <- newSimpleParams() setParam(params, "nGenes", 100) } - diff --git a/man/setParamUnchecked.Rd b/man/setParamUnchecked.Rd index d25ccffac246b301adb251737acfbb9c36bb10e4..e8d1b40b4ab543c8b2ca227cf9a935ab5f035210 100644 --- a/man/setParamUnchecked.Rd +++ b/man/setParamUnchecked.Rd @@ -24,4 +24,3 @@ Object with new parameter value. Function for setting parameter values. THE OUTPUT IS NOT CHECKED FOR VALIDITY! } - diff --git a/man/setParams.Rd b/man/setParams.Rd index a3843db3419a28fcf626184c8a47131e608a8b9d..d8accc7d1e2933530dc1b12e7432c0a29d165ffe 100644 --- a/man/setParams.Rd +++ b/man/setParams.Rd @@ -38,4 +38,3 @@ params params <- setParams(params, list(mean.rate = 0.2, mean.shape = 0.8)) params } - diff --git a/man/setParamsUnchecked.Rd b/man/setParamsUnchecked.Rd index a99d38f8a1b007a1c34d075084374289c362512d..b49f60068c5df5f59af89d78c10ce36469ed887b 100644 --- a/man/setParamsUnchecked.Rd +++ b/man/setParamsUnchecked.Rd @@ -28,4 +28,3 @@ Parameters can be specified using a list via \code{update} (useful when collecting parameter values in some way) or individually (useful when setting them manually), see examples. THE FINAL OBJECT IS NOT CHECKED FOR VALIDITY! } - diff --git a/man/showPP.Rd b/man/showPP.Rd index f97a071b10d6f766964a4c4f07305d942880531f..7adaaa299c13d904e5d5bcb64994eace3102495d 100644 --- a/man/showPP.Rd +++ b/man/showPP.Rd @@ -17,4 +17,3 @@ Print params object to console \description{ Function used for pretty printing params object. } - diff --git a/man/simpleEstimate.Rd b/man/simpleEstimate.Rd index 8e5fb5248215f9eab452972974c7d028e220749e..3be8654a88d7e163a2241a42abd7be00b66c041e 100644 --- a/man/simpleEstimate.Rd +++ b/man/simpleEstimate.Rd @@ -36,4 +36,3 @@ data("sc_example_counts") params <- simpleEstimate(sc_example_counts) params } - diff --git a/man/simpleSimulate.Rd b/man/simpleSimulate.Rd index eeef7ee5683f54de393f2505ea030e0703f2abe8..b2f5fda8c2bb960446c0cc808a457c305956f1d0 100644 --- a/man/simpleSimulate.Rd +++ b/man/simpleSimulate.Rd @@ -33,4 +33,3 @@ sim <- simpleSimulate() # Override default parameters sim <- simpleSimulate(nGenes = 1000, nCells = 50) } - diff --git a/man/splatEstBCV.Rd b/man/splatEstBCV.Rd index 0777efa13966a7941d059f810701346391de9988..9b66b1706a9ec2e31aaae4515c704c1a34bc668d 100644 --- a/man/splatEstBCV.Rd +++ b/man/splatEstBCV.Rd @@ -19,4 +19,3 @@ Parameters are estimated using the \code{estimateDisp} function in the \code{edgeR} package. Specifically the common dispersion and prior degrees of freedom. See \code{\link{estimateDisp}} for details. } - diff --git a/man/splatEstDropout.Rd b/man/splatEstDropout.Rd index ccd52157c78f4af2a60c98586d33cdc905f5ae97..669466993d3587dc8d840807483b366f6d7062aa 100644 --- a/man/splatEstDropout.Rd +++ b/man/splatEstDropout.Rd @@ -34,4 +34,3 @@ but should give a reasonable indication. A more accurate approach is to look at a plot of log2 mean expression vs the difference between observed and expected number of zeros across all genes. } - diff --git a/man/splatEstLib.Rd b/man/splatEstLib.Rd index 0556e5e0818141e7ba43840c2993dae2dd658c1b..73a4b0d6b4e33f46762fea21f4a92feb40c2be4e 100644 --- a/man/splatEstLib.Rd +++ b/man/splatEstLib.Rd @@ -19,4 +19,3 @@ A log-normal distribution is fitted to the library sizes and the estimated parameters are added to the params object. See \code{\link[fitdistrplus]{fitdist}} for details on the fitting. } - diff --git a/man/splatEstMean.Rd b/man/splatEstMean.Rd index e5d12e72fb058cf665f6ba41de37de6a5021c7e5..e0892f9308585edab292d8eac1d38dbb1d5be812 100644 --- a/man/splatEstMean.Rd +++ b/man/splatEstMean.Rd @@ -19,4 +19,3 @@ Estimate rate and shape parameters for the gamma distribution used to simulate gene expression means using the 'moment matching estimation' method of \code{\link[fitdistrplus]{fitdist}}. } - diff --git a/man/splatEstOutlier.Rd b/man/splatEstOutlier.Rd index 60a39ec0c81db1efe43af1c83ddfae8a0361a0af..81e56853061557d9b4618004bd8a570a53eb0575 100644 --- a/man/splatEstOutlier.Rd +++ b/man/splatEstOutlier.Rd @@ -31,4 +31,3 @@ factors in order to estimate the outlier factor location and scale parameters. See \code{\link[fitdistrplus]{fitdist}} for details on the fitting. } - diff --git a/man/splatEstimate.Rd b/man/splatEstimate.Rd index 434b1eb823f31424fbead45629708c98befc1d61..f698fd93e1c57409d5f4317350c05b75bb1554c1 100644 --- a/man/splatEstimate.Rd +++ b/man/splatEstimate.Rd @@ -36,4 +36,3 @@ params \code{\link{splatEstOutlier}}, \code{\link{splatEstBCV}}, \code{\link{splatEstDropout}} } - diff --git a/man/splatSimBCVMeans.Rd b/man/splatSimBCVMeans.Rd index 08c7c28c8f8a9bcb5ae330c3bc012aab6fa426a0..315b5ae7c733d54c5f468d38679fcd3491d8801c 100644 --- a/man/splatSimBCVMeans.Rd +++ b/man/splatSimBCVMeans.Rd @@ -19,4 +19,3 @@ Simulate means for each gene in each cell that are adjusted to follow a mean-variance trend using Biological Coefficient of Variation taken from and inverse gamma distribution. } - diff --git a/man/splatSimCellMeans.Rd b/man/splatSimCellMeans.Rd index 698aa62f9854bcb88849c18be5e76f1cf3c71480..436fd6fe380941b7c750c94fb67912df304de65d 100644 --- a/man/splatSimCellMeans.Rd +++ b/man/splatSimCellMeans.Rd @@ -2,9 +2,9 @@ % Please edit documentation in R/splat-simulate.R \name{splatSimCellMeans} \alias{splatSimCellMeans} +\alias{splatSimSingleCellMeans} \alias{splatSimGroupCellMeans} \alias{splatSimPathCellMeans} -\alias{splatSimSingleCellMeans} \title{Simulate cell means} \usage{ splatSimSingleCellMeans(sim, params) @@ -28,4 +28,3 @@ each cell. Cells start with the mean expression for the group they belong to random position on the appropriate path (when simulating paths). The selected means are adjusted for each cell's expected library size. } - diff --git a/man/splatSimDE.Rd b/man/splatSimDE.Rd index b1729d94157938f5f83d21dff62a13bba5a89455..7af1729ba294d3f36deba65f8ec6aa96a7539584 100644 --- a/man/splatSimDE.Rd +++ b/man/splatSimDE.Rd @@ -24,4 +24,3 @@ group are produced using \code{\link{getLNormFactors}} and these are added along with updated means for each group. For paths care is taked to make sure they are simualated in the correct order. } - diff --git a/man/splatSimDropout.Rd b/man/splatSimDropout.Rd index 2fe71cbda53bbe709c594ee5888b343a7bb68e0a..166336c8c317a19ffbdebeac8e4016ebcf2336c0 100644 --- a/man/splatSimDropout.Rd +++ b/man/splatSimDropout.Rd @@ -20,4 +20,3 @@ level of a gene and the probability of dropout, giving a probability for each gene in each cell. These probabilities are used in a Bernoulli distribution to decide which counts should be dropped. } - diff --git a/man/splatSimGeneMeans.Rd b/man/splatSimGeneMeans.Rd index 531f2b7bcb45854af67b705005b1925d03787d39..7d738db8f2efa32e5f5f3f796c732f3cd612b8f0 100644 --- a/man/splatSimGeneMeans.Rd +++ b/man/splatSimGeneMeans.Rd @@ -19,4 +19,3 @@ Simulate gene means from a gamma distribution. Also simulates outlier expression factors. Genes with an outlier factor not equal to 1 are replaced with the median mean expression multiplied by the outlier factor. } - diff --git a/man/splatSimLibSizes.Rd b/man/splatSimLibSizes.Rd index 4903d21f6de9dcf4fe2567414396e596fd41a0df..4785021b318feedef285c865cc071497286db40e 100644 --- a/man/splatSimLibSizes.Rd +++ b/man/splatSimLibSizes.Rd @@ -17,4 +17,3 @@ SCESet with simulated library sizes. \description{ Simulate expected library sizes from a log-normal distribution } - diff --git a/man/splatSimTrueCounts.Rd b/man/splatSimTrueCounts.Rd index 7ba61a7af23b2e6063b19a947c4f6c08cd04de87..4fc2464f381bb14a4a20a87f80e36bf765a5e2a8 100644 --- a/man/splatSimTrueCounts.Rd +++ b/man/splatSimTrueCounts.Rd @@ -19,4 +19,3 @@ Simulate a true counts matrix. Counts are simulated from a poisson distribution where Each gene in each cell has it's own mean based on the group (or path position), expected library size and BCV. } - diff --git a/man/splatSimulate.Rd b/man/splatSimulate.Rd index d65b54a2c841d5243d749f70fa0db9f32b8bb526..641fc7aa4ae4ca6a22937917b9fb85f015200dce 100644 --- a/man/splatSimulate.Rd +++ b/man/splatSimulate.Rd @@ -2,9 +2,9 @@ % Please edit documentation in R/splat-simulate.R \name{splatSimulate} \alias{splatSimulate} +\alias{splatSimulateSingle} \alias{splatSimulateGroups} \alias{splatSimulatePaths} -\alias{splatSimulateSingle} \title{Splat simulation} \usage{ splatSimulate(params = newSplatParams(), method = c("single", "groups", @@ -132,4 +132,3 @@ sim <- splatSimulate(method = "paths") \code{\link{splatSimBCVMeans}}, \code{\link{splatSimTrueCounts}}, \code{\link{splatSimDropout}} } - diff --git a/man/splatter.Rd b/man/splatter.Rd index beda507ac876862efc735170ab539478d45d29fb..b36d0b6d9bc738daaab5dfee9321be1ee19dd529 100644 --- a/man/splatter.Rd +++ b/man/splatter.Rd @@ -13,4 +13,3 @@ simulation of single-cell RNA-seq count data. As well as it's own simulation model \pkg{splatter} provides functions for the estimation of model parameters. } - diff --git a/tests/testthat/test-SCESet-functions.R b/tests/testthat/test-SCESet-functions.R new file mode 100644 index 0000000000000000000000000000000000000000..cfbac01472fc1397bc3b5a2d47ecce65b30e0c29 --- /dev/null +++ b/tests/testthat/test-SCESet-functions.R @@ -0,0 +1,19 @@ +context("SCESet functions") + +test_that("addGeneLengths generate method works", { + sce <- simpleSimulate() + expect_silent(addGeneLengths(sce)) + expect_error(addGeneLengths("a")) + expect_error(addGeneLengths(sce, loc = "a")) + expect_error(addGeneLengths(sce, scale = "a")) + expect_error(addGeneLengths(sce, scale = -1)) +}) + +test_that("addGeneLength sample method works", { + sce <- simpleSimulate() + lens <- round(runif(100, 100, 10000)) + expect_silent(addGeneLengths(sce, method = "sample", lengths = lens)) + expect_error(addGeneLengths(sce, method = "sample")) + expect_error(addGeneLengths(sce, method = "sample"), lengths = 0) + expect_error(addGeneLengths(sce, method = "sample"), lengths = "a") +}) diff --git a/vignettes/splatter.Rmd b/vignettes/splatter.Rmd index d170b1e019052a5f9546efcbe4855173f16974e1..8c1a8152b67529fda8f91979425c21a05df4cb57 100644 --- a/vignettes/splatter.Rmd +++ b/vignettes/splatter.Rmd @@ -388,6 +388,34 @@ detailed information on each simulation see the appropriate help page (eg. `?lun2Estimate` for details of how the Lun 2 simulation estimates parameters) or refer to the appropriate paper or package. +# Other expression values + +Splatter is designed to simulate count data but some analysis methods expect +other expression values, particularly length-normalised values such as TPM or +FPKM. The `scater` package has functions for adding these values to an `SCESet` +object but they require a length for each gene. The `addGeneLengths` can be +used to simulate these lengths: + +```{r lengths} +sim <- simpleSimulate(verbose = FALSE) +sim <- addGeneLengths(sim) +head(fData(sim)) +``` + +We can then use `scater` to calculate TPM: + +```{r TPM} +tpm(sim) <- calculateTPM(sim, fData(sim)$Length) +tpm(sim)[1:5, 1:5] +``` + +The default method used by `addGeneLengths` to simulate lengths is to generate +values from a log-normal distribution which are then rounded to give an integer +length. The parameters for this distribution are based on human coding genes +but can be adjusted if needed (for example for other species). Alternatively +lengths can be sampled from a provided vector (see `?addGeneLengths` for details +and an example). + # Comparing simulations and real data One thing you might like to do after simulating data is to compare it to a real @@ -434,4 +462,4 @@ sessionInfo() [gamma]: https://en.wikipedia.org/wiki/Gamma_distribution [poisson]: https://en.wikipedia.org/wiki/Poisson_distribution -[scater-vignette]: https://bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/vignette.html \ No newline at end of file +[scater-vignette]: https://bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/vignette.html