From 4ff20b7440e2c81915e9551ae8fa341f8d1e49d3 Mon Sep 17 00:00:00 2001 From: Luke Zappia <lazappi@users.noreply.github.com> Date: Mon, 15 Apr 2019 15:28:58 +1000 Subject: [PATCH] Minor changes to documentation Fixes #64 Also remove lingering references to phenoData and featureData --- R/AllClasses.R | 2 +- R/compare.R | 89 ++++++++++++++++++----------------- R/splat-simulate.R | 6 +-- man/SplatParams.Rd | 2 +- man/compareSCEs.Rd | 4 +- man/diffSCEs.Rd | 4 +- man/splatSimulate.Rd | 6 +-- tests/testthat/test-compare.R | 12 ++--- vignettes/splatter.Rmd | 12 ++--- 9 files changed, 71 insertions(+), 66 deletions(-) diff --git a/R/AllClasses.R b/R/AllClasses.R index dff138a..eb6a414 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -126,7 +126,7 @@ setClass("SimpleParams", #' \describe{ #' \item{\code{[de.prob]}}{Probability that a gene is differentially #' expressed in a group. Can be a vector.} -#' \item{\code{[de.loProb]}}{Probability that a differentially +#' \item{\code{[de.downProb]}}{Probability that a differentially #' expressed gene is down-regulated. Can be a vector.} #' \item{\code{[de.facLoc]}}{Location (meanlog) parameter for the #' differential expression factor log-normal distribution. Can be a diff --git a/R/compare.R b/R/compare.R index 3743e64..88fede5 100644 --- a/R/compare.R +++ b/R/compare.R @@ -14,9 +14,9 @@ #' The returned list has three items: #' #' \describe{ -#' \item{\code{FeatureData}}{Combined feature data from the provided +#' \item{\code{RowData}}{Combined row data from the provided #' SingleCellExperiments.} -#' \item{\code{PhenoData}}{Combined pheno data from the provided +#' \item{\code{ColData}}{Combined column data from the provided #' SingleCellExperiments.} #' \item{\code{Plots}}{Comparison plots #' \describe{ @@ -177,8 +177,8 @@ compareSCEs <- function(sces, point.size = 0.1, point.alpha = 0.1, mean.zeros <- mean.zeros + geom_smooth() } - comparison <- list(FeatureData = features, - PhenoData = cells, + comparison <- list(RowData = features, + ColData = cells, Plots = list(Means = means, Variances = vars, MeanVar = mean.var, @@ -217,9 +217,9 @@ compareSCEs <- function(sces, point.size = 0.1, point.alpha = 0.1, #' #' \describe{ #' \item{\code{Reference}}{The SingleCellExperiment used as the reference.} -#' \item{\code{FeatureData}}{Combined feature data from the provided +#' \item{\code{RowData}}{Combined feature data from the provided #' SingleCellExperiments.} -#' \item{\code{PhenoData}}{Combined pheno data from the provided +#' \item{\code{ColData}}{Combined column data from the provided #' SingleCellExperiments.} #' \item{\code{Plots}}{Difference plots #' \describe{ @@ -509,8 +509,8 @@ diffSCEs <- function(sces, ref, point.size = 0.1, point.alpha = 0.1, } comparison <- list(Reference = ref.sce, - FeatureData = features, - PhenoData = cells, + RowData = features, + ColData = cells, Plots = list(Means = means, Variances = vars, MeanVar = mean.var, @@ -824,10 +824,10 @@ makeOverallPanel <- function(comp, diff, title = "Overall comparison", #' @export summariseDiff <- function(diff) { - datasets <- unique(diff$PhenoData$Dataset) + datasets <- unique(diff$ColData$Dataset) - fData.mads <- sapply(datasets, function(dataset) { - df <- diff$FeatureData[diff$FeatureData$Dataset == dataset, ] + rowData.mads <- sapply(datasets, function(dataset) { + df <- diff$RowData[diff$RowData$Dataset == dataset, ] mean <- median(abs(df$RankDiffMeanLogCPM)) var <- median(abs(df$RankDiffVarLogCPM)) zeros <- median(abs(df$RankDiffZeros)) @@ -836,27 +836,29 @@ summariseDiff <- function(diff) { return(c(Mean = mean, Variance = var, ZerosGene = zeros, MeanVar = mean.var, MeanZeros = mean.zeros)) }) - fData.mads.z <- t(scale(t(fData.mads))) + rowData.mads.z <- t(scale(t(rowData.mads))) - pData.mads <- sapply(datasets, function(dataset) { - df <- diff$PhenoData[diff$PhenoData$Dataset == dataset, ] + colData.mads <- sapply(datasets, function(dataset) { + df <- diff$ColData[diff$ColData$Dataset == dataset, ] lib.size <- median(abs(df$RankDiffLibSize)) zeros <- median(abs(df$RankDiffZeros)) return(c(LibSize = lib.size, ZerosCell = zeros)) }) - pData.mads.z <- t(scale(t(pData.mads))) + colData.mads.z <- t(scale(t(colData.mads))) - mads <- data.frame(Dataset = datasets, t(fData.mads), t(pData.mads)) - mads.z <- data.frame(Dataset = datasets, t(fData.mads.z), t(pData.mads.z)) + mads <- data.frame(Dataset = datasets, t(rowData.mads), t(colData.mads)) + mads.z <- data.frame(Dataset = datasets, t(rowData.mads.z), + t(colData.mads.z)) - fData.ranks <- matrixStats::rowRanks(fData.mads) - pData.ranks <- matrixStats::rowRanks(pData.mads) + rowData.ranks <- matrixStats::rowRanks(rowData.mads) + colData.ranks <- matrixStats::rowRanks(colData.mads) - ranks.mads <- data.frame(Dataset = datasets, t(fData.ranks), t(pData.ranks)) + ranks.mads <- data.frame(Dataset = datasets, t(rowData.ranks), + t(colData.ranks)) colnames(ranks.mads) <- paste0(colnames(mads), "Rank") - fData.maes <- sapply(datasets, function(dataset) { - df <- diff$FeatureData[diff$FeatureData$Dataset == dataset, ] + rowData.maes <- sapply(datasets, function(dataset) { + df <- diff$RowData[diff$RowData$Dataset == dataset, ] mean <- mean(abs(df$RankDiffMeanLogCPM)) var <- mean(abs(df$RankDiffVarLogCPM)) zeros <- mean(abs(df$RankDiffZeros)) @@ -865,27 +867,28 @@ summariseDiff <- function(diff) { return(c(Mean = mean, Variance = var, ZerosGene = zeros, MeanVar = mean.var, MeanZeros = mean.zeros)) }) - fData.maes.z <- t(scale(t(fData.maes))) + rowData.maes.z <- t(scale(t(rowData.maes))) - pData.maes <- sapply(datasets, function(dataset) { - df <- diff$PhenoData[diff$PhenoData$Dataset == dataset, ] + colData.maes <- sapply(datasets, function(dataset) { + df <- diff$ColData[diff$ColData$Dataset == dataset, ] lib.size <- mean(abs(df$RankDiffLibSize)) zeros <- mean(abs(df$RankDiffZeros)) return(c(LibSize = lib.size, ZerosCell = zeros)) }) - pData.maes.z <- t(scale(t(pData.maes))) + colData.maes.z <- t(scale(t(colData.maes))) - maes <- data.frame(Dataset = datasets, t(fData.maes), t(pData.maes)) - maes.z <- data.frame(Dataset = datasets, t(fData.maes.z), t(pData.maes.z)) + maes <- data.frame(Dataset = datasets, t(rowData.maes), t(colData.maes)) + maes.z <- data.frame(Dataset = datasets, t(rowData.maes.z), + t(colData.maes.z)) - fData.ranks <- matrixStats::rowRanks(fData.maes) - pData.ranks <- matrixStats::rowRanks(pData.maes) + rowData.ranks <- matrixStats::rowRanks(rowData.maes) + colData.ranks <- matrixStats::rowRanks(colData.maes) - ranks.maes <- data.frame(Dataset = datasets, t(fData.ranks), t(pData.ranks)) + ranks.maes <- data.frame(Dataset = datasets, t(rowData.ranks), t(colData.ranks)) colnames(ranks.maes) <- paste0(colnames(mads), "Rank") - fData.rmse <- sapply(datasets, function(dataset) { - df <- diff$FeatureData[diff$FeatureData$Dataset == dataset, ] + rowData.rmse <- sapply(datasets, function(dataset) { + df <- diff$RowData[diff$RowData$Dataset == dataset, ] mean <- sqrt(mean(df$RankDiffMeanLogCPM ^ 2)) var <- sqrt(mean(df$RankDiffVarLogCPM ^ 2)) zeros <- sqrt(mean(df$RankDiffZeros ^ 2)) @@ -894,23 +897,25 @@ summariseDiff <- function(diff) { return(c(Mean = mean, Variance = var, ZerosGene = zeros, MeanVar = mean.var, MeanZeros = mean.zeros)) }) - fData.rmse.z <- t(scale(t(fData.rmse))) + rowData.rmse.z <- t(scale(t(rowData.rmse))) - pData.rmse <- sapply(datasets, function(dataset) { - df <- diff$PhenoData[diff$PhenoData$Dataset == dataset, ] + colData.rmse <- sapply(datasets, function(dataset) { + df <- diff$ColData[diff$ColData$Dataset == dataset, ] lib.size <- sqrt(mean(df$RankDiffLibSize ^ 2)) zeros <- sqrt(mean(df$RankDiffZeros ^ 2)) return(c(LibSize = lib.size, ZerosCell = zeros)) }) - pData.rmse.z <- t(scale(t(pData.rmse))) + colData.rmse.z <- t(scale(t(colData.rmse))) - rmse <- data.frame(Dataset = datasets, t(fData.rmse), t(pData.rmse)) - rmse.z <- data.frame(Dataset = datasets, t(fData.rmse.z), t(pData.rmse.z)) + rmse <- data.frame(Dataset = datasets, t(rowData.rmse), t(colData.rmse)) + rmse.z <- data.frame(Dataset = datasets, t(rowData.rmse.z), + t(colData.rmse.z)) - fData.ranks <- matrixStats::rowRanks(fData.rmse) - pData.ranks <- matrixStats::rowRanks(pData.rmse) + rowData.ranks <- matrixStats::rowRanks(rowData.rmse) + colData.ranks <- matrixStats::rowRanks(colData.rmse) - ranks.rmse <- data.frame(Dataset = datasets, t(fData.ranks), t(pData.ranks)) + ranks.rmse <- data.frame(Dataset = datasets, t(rowData.ranks), + t(colData.ranks)) colnames(ranks.rmse) <- paste0(colnames(rmse), "Rank") mads <- stats::reshape(mads, varying = 2:8, direction = "long", diff --git a/R/splat-simulate.R b/R/splat-simulate.R index fbf9ff1..6c9a0e7 100644 --- a/R/splat-simulate.R +++ b/R/splat-simulate.R @@ -42,7 +42,7 @@ #' \code{\link{assays}} (for gene by cell matrices) slots. This additional #' information includes: #' \describe{ -#' \item{\code{phenoData}}{ +#' \item{\code{colData}}{ #' \describe{ #' \item{Cell}{Unique cell identifier.} #' \item{Group}{The group or path the cell belongs to.} @@ -50,7 +50,7 @@ #' \item{Step (paths only)}{how far along the path each cell is.} #' } #' } -#' \item{\code{featureData}}{ +#' \item{\code{rowData}}{ #' \describe{ #' \item{Gene}{Unique gene identifier.} #' \item{BaseGeneMean}{The base expression level for that gene.} @@ -66,7 +66,7 @@ #' non-linear changes in expression along a path.} #' } #' } -#' \item{\code{assayData}}{ +#' \item{\code{assays}}{ #' \describe{ #' \item{BatchCellMeans}{The mean expression of genes in each cell #' after adding batch effects.} diff --git a/man/SplatParams.Rd b/man/SplatParams.Rd index fbd581d..23b089c 100644 --- a/man/SplatParams.Rd +++ b/man/SplatParams.Rd @@ -70,7 +70,7 @@ The Splatter simulation requires the following parameters: \describe{ \item{\code{[de.prob]}}{Probability that a gene is differentially expressed in a group. Can be a vector.} - \item{\code{[de.loProb]}}{Probability that a differentially + \item{\code{[de.downProb]}}{Probability that a differentially expressed gene is down-regulated. Can be a vector.} \item{\code{[de.facLoc]}}{Location (meanlog) parameter for the differential expression factor log-normal distribution. Can be a diff --git a/man/compareSCEs.Rd b/man/compareSCEs.Rd index 711a2be..f0408d0 100644 --- a/man/compareSCEs.Rd +++ b/man/compareSCEs.Rd @@ -30,9 +30,9 @@ basic plots comparing them. The returned list has three items: \describe{ - \item{\code{FeatureData}}{Combined feature data from the provided + \item{\code{RowData}}{Combined row data from the provided SingleCellExperiments.} - \item{\code{PhenoData}}{Combined pheno data from the provided + \item{\code{ColData}}{Combined column data from the provided SingleCellExperiments.} \item{\code{Plots}}{Comparison plots \describe{ diff --git a/man/diffSCEs.Rd b/man/diffSCEs.Rd index a24a5c2..3e75cb6 100644 --- a/man/diffSCEs.Rd +++ b/man/diffSCEs.Rd @@ -41,9 +41,9 @@ The returned list has five items: \describe{ \item{\code{Reference}}{The SingleCellExperiment used as the reference.} - \item{\code{FeatureData}}{Combined feature data from the provided + \item{\code{RowData}}{Combined feature data from the provided SingleCellExperiments.} - \item{\code{PhenoData}}{Combined pheno data from the provided + \item{\code{ColData}}{Combined column data from the provided SingleCellExperiments.} \item{\code{Plots}}{Difference plots \describe{ diff --git a/man/splatSimulate.Rd b/man/splatSimulate.Rd index 5385187..578477f 100644 --- a/man/splatSimulate.Rd +++ b/man/splatSimulate.Rd @@ -67,7 +67,7 @@ information), \code{\link{rowData}} (for gene specific information) or \code{\link{assays}} (for gene by cell matrices) slots. This additional information includes: \describe{ - \item{\code{phenoData}}{ + \item{\code{colData}}{ \describe{ \item{Cell}{Unique cell identifier.} \item{Group}{The group or path the cell belongs to.} @@ -75,7 +75,7 @@ information includes: \item{Step (paths only)}{how far along the path each cell is.} } } - \item{\code{featureData}}{ + \item{\code{rowData}}{ \describe{ \item{Gene}{Unique gene identifier.} \item{BaseGeneMean}{The base expression level for that gene.} @@ -91,7 +91,7 @@ information includes: non-linear changes in expression along a path.} } } - \item{\code{assayData}}{ + \item{\code{assays}}{ \describe{ \item{BatchCellMeans}{The mean expression of genes in each cell after adding batch effects.} diff --git a/tests/testthat/test-compare.R b/tests/testthat/test-compare.R index 04d6520..29540df 100644 --- a/tests/testthat/test-compare.R +++ b/tests/testthat/test-compare.R @@ -8,10 +8,10 @@ difference <- diffSCEs(list(Splat = sim1, Simple = sim2), ref = "Simple") test_that("compareSCEs works", { expect_length(comparison, 3) - expect_true(all(c("FeatureData", "PhenoData", "Plots") %in% + expect_true(all(c("RowData", "ColData", "Plots") %in% names(comparison))) - checkmate::expect_class(comparison$PhenoData, "data.frame") - checkmate::expect_class(comparison$FeatureData, "data.frame") + checkmate::expect_class(comparison$ColData, "data.frame") + checkmate::expect_class(comparison$RowData, "data.frame") expect_length(comparison$Plots, 7) expect_true(all(c("Means", "Variances", "MeanVar", "LibrarySizes", "ZerosGene", "ZerosCell", "MeanZeros") %in% @@ -23,11 +23,11 @@ test_that("compareSCEs works", { test_that("diffSCEs works", { expect_length(difference, 5) - expect_true(all(c("Reference", "FeatureData", "PhenoData", "Plots", + expect_true(all(c("Reference", "RowData", "ColData", "Plots", "QQPlots") %in% names(difference))) checkmate::expect_class(difference$Reference, "SingleCellExperiment") - checkmate::expect_class(difference$PhenoData, "data.frame") - checkmate::expect_class(difference$FeatureData, "data.frame") + checkmate::expect_class(difference$ColData, "data.frame") + checkmate::expect_class(difference$RowData, "data.frame") expect_length(difference$Plots, 7) expect_true(all(c("Means", "Variances", "MeanVar", "LibrarySizes", "ZerosGene", "ZerosCell", "MeanZeros") %in% diff --git a/vignettes/splatter.Rmd b/vignettes/splatter.Rmd index af3eeff..be82bd3 100644 --- a/vignettes/splatter.Rmd +++ b/vignettes/splatter.Rmd @@ -121,7 +121,7 @@ The parameters required for the Splat simulation are briefly described here: * **Differential expression parameters** * `de.prob` - Probability that a gene is differentially expressed in each group or path. - * `de.loProb` - Probability that a differentially expressed gene is + * `de.downProb` - Probability that a differentially expressed gene is down-regulated. * `de.facLoc` - Location (meanlog) parameter for the differential expression factor log-normal distribution. @@ -298,12 +298,12 @@ For more details about the `SingleCellExperiment` object refer to the [vignette] The `splatSimulate` function outputs the following additional information about the simulation: -* **Cell information (`pData`)** +* **Cell information (`colData`)** * `Cell` - Unique cell identifier. * `Group` - The group or path the cell belongs to. * `ExpLibSize` - The expected library size for that cell. * `Step` (paths only) - How far along the path each cell is. -* **Gene information (`fData`)** +* **Gene information (`rowData`)** * `Gene` - Unique gene identifier. * `BaseGeneMean` - The base expression level for that gene. * `OutlierFactor` - Expression outlier factor for that gene (1 is not an @@ -313,7 +313,7 @@ the simulation: in a particular group (1 is not differentially expressed). * `GeneMean[Group]` - Expression level of a gene in a particular group after applying differential expression factors. -* **Gene by cell information (`assayData`)** +* **Gene by cell information (`assays`)** * `BaseCellMeans` - The expression of genes in each cell adjusted for expected library size. * `BCV` - The Biological Coefficient of Variation for each gene in @@ -489,7 +489,7 @@ names(comparison$Plots) ``` The returned list has three items. The first two are the combined datasets by -gene (`FeatureData`) and by cell (`PhenoData`) and the third contains some +gene (`RowData`) and by cell (`ColData`) and the third contains some comparison plots (produced using `ggplot2`), for example a plot of the distribution of means: @@ -503,7 +503,7 @@ number of expressed genes against the library size: ```{r comparison-libsize-features} library("ggplot2") -ggplot(comparison$PhenoData, +ggplot(comparison$ColData, aes(x = total_counts, y = total_features_by_counts, colour = Dataset)) + geom_point() ``` -- GitLab