From 4ff20b7440e2c81915e9551ae8fa341f8d1e49d3 Mon Sep 17 00:00:00 2001
From: Luke Zappia <lazappi@users.noreply.github.com>
Date: Mon, 15 Apr 2019 15:28:58 +1000
Subject: [PATCH] Minor changes to documentation

Fixes #64

Also remove lingering references to phenoData and featureData
---
 R/AllClasses.R                |  2 +-
 R/compare.R                   | 89 ++++++++++++++++++-----------------
 R/splat-simulate.R            |  6 +--
 man/SplatParams.Rd            |  2 +-
 man/compareSCEs.Rd            |  4 +-
 man/diffSCEs.Rd               |  4 +-
 man/splatSimulate.Rd          |  6 +--
 tests/testthat/test-compare.R | 12 ++---
 vignettes/splatter.Rmd        | 12 ++---
 9 files changed, 71 insertions(+), 66 deletions(-)

diff --git a/R/AllClasses.R b/R/AllClasses.R
index dff138a..eb6a414 100644
--- a/R/AllClasses.R
+++ b/R/AllClasses.R
@@ -126,7 +126,7 @@ setClass("SimpleParams",
 #'         \describe{
 #'             \item{\code{[de.prob]}}{Probability that a gene is differentially
 #'             expressed in a group. Can be a vector.}
-#'             \item{\code{[de.loProb]}}{Probability that a differentially
+#'             \item{\code{[de.downProb]}}{Probability that a differentially
 #'             expressed gene is down-regulated. Can be a vector.}
 #'             \item{\code{[de.facLoc]}}{Location (meanlog) parameter for the
 #'             differential expression factor log-normal distribution. Can be a
diff --git a/R/compare.R b/R/compare.R
index 3743e64..88fede5 100644
--- a/R/compare.R
+++ b/R/compare.R
@@ -14,9 +14,9 @@
 #' The returned list has three items:
 #'
 #' \describe{
-#'     \item{\code{FeatureData}}{Combined feature data from the provided
+#'     \item{\code{RowData}}{Combined row data from the provided
 #'     SingleCellExperiments.}
-#'     \item{\code{PhenoData}}{Combined pheno data from the provided
+#'     \item{\code{ColData}}{Combined column data from the provided
 #'     SingleCellExperiments.}
 #'     \item{\code{Plots}}{Comparison plots
 #'         \describe{
@@ -177,8 +177,8 @@ compareSCEs <- function(sces, point.size = 0.1, point.alpha = 0.1,
         mean.zeros <- mean.zeros + geom_smooth()
     }
 
-    comparison <- list(FeatureData = features,
-                       PhenoData = cells,
+    comparison <- list(RowData = features,
+                       ColData = cells,
                        Plots = list(Means = means,
                                     Variances = vars,
                                     MeanVar = mean.var,
@@ -217,9 +217,9 @@ compareSCEs <- function(sces, point.size = 0.1, point.alpha = 0.1,
 #'
 #' \describe{
 #'     \item{\code{Reference}}{The SingleCellExperiment used as the reference.}
-#'     \item{\code{FeatureData}}{Combined feature data from the provided
+#'     \item{\code{RowData}}{Combined feature data from the provided
 #'     SingleCellExperiments.}
-#'     \item{\code{PhenoData}}{Combined pheno data from the provided
+#'     \item{\code{ColData}}{Combined column data from the provided
 #'     SingleCellExperiments.}
 #'     \item{\code{Plots}}{Difference plots
 #'         \describe{
@@ -509,8 +509,8 @@ diffSCEs <- function(sces, ref, point.size = 0.1, point.alpha = 0.1,
     }
 
     comparison <- list(Reference = ref.sce,
-                       FeatureData = features,
-                       PhenoData = cells,
+                       RowData = features,
+                       ColData = cells,
                        Plots = list(Means = means,
                                     Variances = vars,
                                     MeanVar = mean.var,
@@ -824,10 +824,10 @@ makeOverallPanel <- function(comp, diff, title = "Overall comparison",
 #' @export
 summariseDiff <- function(diff) {
 
-    datasets <- unique(diff$PhenoData$Dataset)
+    datasets <- unique(diff$ColData$Dataset)
 
-    fData.mads <- sapply(datasets, function(dataset) {
-        df <- diff$FeatureData[diff$FeatureData$Dataset == dataset, ]
+    rowData.mads <- sapply(datasets, function(dataset) {
+        df <- diff$RowData[diff$RowData$Dataset == dataset, ]
         mean <- median(abs(df$RankDiffMeanLogCPM))
         var <- median(abs(df$RankDiffVarLogCPM))
         zeros <- median(abs(df$RankDiffZeros))
@@ -836,27 +836,29 @@ summariseDiff <- function(diff) {
         return(c(Mean = mean, Variance = var, ZerosGene = zeros,
                  MeanVar = mean.var, MeanZeros = mean.zeros))
     })
-    fData.mads.z <- t(scale(t(fData.mads)))
+    rowData.mads.z <- t(scale(t(rowData.mads)))
 
-    pData.mads <- sapply(datasets, function(dataset) {
-        df <- diff$PhenoData[diff$PhenoData$Dataset == dataset, ]
+    colData.mads <- sapply(datasets, function(dataset) {
+        df <- diff$ColData[diff$ColData$Dataset == dataset, ]
         lib.size <- median(abs(df$RankDiffLibSize))
         zeros <- median(abs(df$RankDiffZeros))
         return(c(LibSize = lib.size, ZerosCell = zeros))
     })
-    pData.mads.z <- t(scale(t(pData.mads)))
+    colData.mads.z <- t(scale(t(colData.mads)))
 
-    mads <- data.frame(Dataset = datasets, t(fData.mads), t(pData.mads))
-    mads.z <- data.frame(Dataset = datasets, t(fData.mads.z), t(pData.mads.z))
+    mads <- data.frame(Dataset = datasets, t(rowData.mads), t(colData.mads))
+    mads.z <- data.frame(Dataset = datasets, t(rowData.mads.z),
+                         t(colData.mads.z))
 
-    fData.ranks <- matrixStats::rowRanks(fData.mads)
-    pData.ranks <- matrixStats::rowRanks(pData.mads)
+    rowData.ranks <- matrixStats::rowRanks(rowData.mads)
+    colData.ranks <- matrixStats::rowRanks(colData.mads)
 
-    ranks.mads <- data.frame(Dataset = datasets, t(fData.ranks), t(pData.ranks))
+    ranks.mads <- data.frame(Dataset = datasets, t(rowData.ranks),
+                             t(colData.ranks))
     colnames(ranks.mads) <- paste0(colnames(mads), "Rank")
 
-    fData.maes <- sapply(datasets, function(dataset) {
-        df <- diff$FeatureData[diff$FeatureData$Dataset == dataset, ]
+    rowData.maes <- sapply(datasets, function(dataset) {
+        df <- diff$RowData[diff$RowData$Dataset == dataset, ]
         mean <- mean(abs(df$RankDiffMeanLogCPM))
         var <- mean(abs(df$RankDiffVarLogCPM))
         zeros <- mean(abs(df$RankDiffZeros))
@@ -865,27 +867,28 @@ summariseDiff <- function(diff) {
         return(c(Mean = mean, Variance = var, ZerosGene = zeros,
                  MeanVar = mean.var, MeanZeros = mean.zeros))
     })
-    fData.maes.z <- t(scale(t(fData.maes)))
+    rowData.maes.z <- t(scale(t(rowData.maes)))
 
-    pData.maes <- sapply(datasets, function(dataset) {
-        df <- diff$PhenoData[diff$PhenoData$Dataset == dataset, ]
+    colData.maes <- sapply(datasets, function(dataset) {
+        df <- diff$ColData[diff$ColData$Dataset == dataset, ]
         lib.size <- mean(abs(df$RankDiffLibSize))
         zeros <- mean(abs(df$RankDiffZeros))
         return(c(LibSize = lib.size, ZerosCell = zeros))
     })
-    pData.maes.z <- t(scale(t(pData.maes)))
+    colData.maes.z <- t(scale(t(colData.maes)))
 
-    maes <- data.frame(Dataset = datasets, t(fData.maes), t(pData.maes))
-    maes.z <- data.frame(Dataset = datasets, t(fData.maes.z), t(pData.maes.z))
+    maes <- data.frame(Dataset = datasets, t(rowData.maes), t(colData.maes))
+    maes.z <- data.frame(Dataset = datasets, t(rowData.maes.z),
+                         t(colData.maes.z))
 
-    fData.ranks <- matrixStats::rowRanks(fData.maes)
-    pData.ranks <- matrixStats::rowRanks(pData.maes)
+    rowData.ranks <- matrixStats::rowRanks(rowData.maes)
+    colData.ranks <- matrixStats::rowRanks(colData.maes)
 
-    ranks.maes <- data.frame(Dataset = datasets, t(fData.ranks), t(pData.ranks))
+    ranks.maes <- data.frame(Dataset = datasets, t(rowData.ranks), t(colData.ranks))
     colnames(ranks.maes) <- paste0(colnames(mads), "Rank")
 
-    fData.rmse <- sapply(datasets, function(dataset) {
-        df <- diff$FeatureData[diff$FeatureData$Dataset == dataset, ]
+    rowData.rmse <- sapply(datasets, function(dataset) {
+        df <- diff$RowData[diff$RowData$Dataset == dataset, ]
         mean <- sqrt(mean(df$RankDiffMeanLogCPM ^ 2))
         var <- sqrt(mean(df$RankDiffVarLogCPM ^ 2))
         zeros <- sqrt(mean(df$RankDiffZeros ^ 2))
@@ -894,23 +897,25 @@ summariseDiff <- function(diff) {
         return(c(Mean = mean, Variance = var, ZerosGene = zeros,
                  MeanVar = mean.var, MeanZeros = mean.zeros))
     })
-    fData.rmse.z <- t(scale(t(fData.rmse)))
+    rowData.rmse.z <- t(scale(t(rowData.rmse)))
 
-    pData.rmse <- sapply(datasets, function(dataset) {
-        df <- diff$PhenoData[diff$PhenoData$Dataset == dataset, ]
+    colData.rmse <- sapply(datasets, function(dataset) {
+        df <- diff$ColData[diff$ColData$Dataset == dataset, ]
         lib.size <- sqrt(mean(df$RankDiffLibSize ^ 2))
         zeros <- sqrt(mean(df$RankDiffZeros ^ 2))
         return(c(LibSize = lib.size, ZerosCell = zeros))
     })
-    pData.rmse.z <- t(scale(t(pData.rmse)))
+    colData.rmse.z <- t(scale(t(colData.rmse)))
 
-    rmse <- data.frame(Dataset = datasets, t(fData.rmse), t(pData.rmse))
-    rmse.z <- data.frame(Dataset = datasets, t(fData.rmse.z), t(pData.rmse.z))
+    rmse <- data.frame(Dataset = datasets, t(rowData.rmse), t(colData.rmse))
+    rmse.z <- data.frame(Dataset = datasets, t(rowData.rmse.z),
+                         t(colData.rmse.z))
 
-    fData.ranks <- matrixStats::rowRanks(fData.rmse)
-    pData.ranks <- matrixStats::rowRanks(pData.rmse)
+    rowData.ranks <- matrixStats::rowRanks(rowData.rmse)
+    colData.ranks <- matrixStats::rowRanks(colData.rmse)
 
-    ranks.rmse <- data.frame(Dataset = datasets, t(fData.ranks), t(pData.ranks))
+    ranks.rmse <- data.frame(Dataset = datasets, t(rowData.ranks),
+                             t(colData.ranks))
     colnames(ranks.rmse) <- paste0(colnames(rmse), "Rank")
 
     mads <- stats::reshape(mads, varying = 2:8, direction = "long",
diff --git a/R/splat-simulate.R b/R/splat-simulate.R
index fbf9ff1..6c9a0e7 100644
--- a/R/splat-simulate.R
+++ b/R/splat-simulate.R
@@ -42,7 +42,7 @@
 #' \code{\link{assays}} (for gene by cell matrices) slots. This additional
 #' information includes:
 #' \describe{
-#'     \item{\code{phenoData}}{
+#'     \item{\code{colData}}{
 #'         \describe{
 #'             \item{Cell}{Unique cell identifier.}
 #'             \item{Group}{The group or path the cell belongs to.}
@@ -50,7 +50,7 @@
 #'             \item{Step (paths only)}{how far along the path each cell is.}
 #'         }
 #'     }
-#'     \item{\code{featureData}}{
+#'     \item{\code{rowData}}{
 #'         \describe{
 #'             \item{Gene}{Unique gene identifier.}
 #'             \item{BaseGeneMean}{The base expression level for that gene.}
@@ -66,7 +66,7 @@
 #'             non-linear changes in expression along a path.}
 #'         }
 #'     }
-#'     \item{\code{assayData}}{
+#'     \item{\code{assays}}{
 #'         \describe{
 #'             \item{BatchCellMeans}{The mean expression of genes in each cell
 #'             after adding batch effects.}
diff --git a/man/SplatParams.Rd b/man/SplatParams.Rd
index fbd581d..23b089c 100644
--- a/man/SplatParams.Rd
+++ b/man/SplatParams.Rd
@@ -70,7 +70,7 @@ The Splatter simulation requires the following parameters:
         \describe{
             \item{\code{[de.prob]}}{Probability that a gene is differentially
             expressed in a group. Can be a vector.}
-            \item{\code{[de.loProb]}}{Probability that a differentially
+            \item{\code{[de.downProb]}}{Probability that a differentially
             expressed gene is down-regulated. Can be a vector.}
             \item{\code{[de.facLoc]}}{Location (meanlog) parameter for the
             differential expression factor log-normal distribution. Can be a
diff --git a/man/compareSCEs.Rd b/man/compareSCEs.Rd
index 711a2be..f0408d0 100644
--- a/man/compareSCEs.Rd
+++ b/man/compareSCEs.Rd
@@ -30,9 +30,9 @@ basic plots comparing them.
 The returned list has three items:
 
 \describe{
-    \item{\code{FeatureData}}{Combined feature data from the provided
+    \item{\code{RowData}}{Combined row data from the provided
     SingleCellExperiments.}
-    \item{\code{PhenoData}}{Combined pheno data from the provided
+    \item{\code{ColData}}{Combined column data from the provided
     SingleCellExperiments.}
     \item{\code{Plots}}{Comparison plots
         \describe{
diff --git a/man/diffSCEs.Rd b/man/diffSCEs.Rd
index a24a5c2..3e75cb6 100644
--- a/man/diffSCEs.Rd
+++ b/man/diffSCEs.Rd
@@ -41,9 +41,9 @@ The returned list has five items:
 
 \describe{
     \item{\code{Reference}}{The SingleCellExperiment used as the reference.}
-    \item{\code{FeatureData}}{Combined feature data from the provided
+    \item{\code{RowData}}{Combined feature data from the provided
     SingleCellExperiments.}
-    \item{\code{PhenoData}}{Combined pheno data from the provided
+    \item{\code{ColData}}{Combined column data from the provided
     SingleCellExperiments.}
     \item{\code{Plots}}{Difference plots
         \describe{
diff --git a/man/splatSimulate.Rd b/man/splatSimulate.Rd
index 5385187..578477f 100644
--- a/man/splatSimulate.Rd
+++ b/man/splatSimulate.Rd
@@ -67,7 +67,7 @@ information), \code{\link{rowData}} (for gene specific information) or
 \code{\link{assays}} (for gene by cell matrices) slots. This additional
 information includes:
 \describe{
-    \item{\code{phenoData}}{
+    \item{\code{colData}}{
         \describe{
             \item{Cell}{Unique cell identifier.}
             \item{Group}{The group or path the cell belongs to.}
@@ -75,7 +75,7 @@ information includes:
             \item{Step (paths only)}{how far along the path each cell is.}
         }
     }
-    \item{\code{featureData}}{
+    \item{\code{rowData}}{
         \describe{
             \item{Gene}{Unique gene identifier.}
             \item{BaseGeneMean}{The base expression level for that gene.}
@@ -91,7 +91,7 @@ information includes:
             non-linear changes in expression along a path.}
         }
     }
-    \item{\code{assayData}}{
+    \item{\code{assays}}{
         \describe{
             \item{BatchCellMeans}{The mean expression of genes in each cell
             after adding batch effects.}
diff --git a/tests/testthat/test-compare.R b/tests/testthat/test-compare.R
index 04d6520..29540df 100644
--- a/tests/testthat/test-compare.R
+++ b/tests/testthat/test-compare.R
@@ -8,10 +8,10 @@ difference <- diffSCEs(list(Splat = sim1, Simple = sim2), ref = "Simple")
 
 test_that("compareSCEs works", {
     expect_length(comparison, 3)
-    expect_true(all(c("FeatureData", "PhenoData", "Plots") %in%
+    expect_true(all(c("RowData", "ColData", "Plots") %in%
                         names(comparison)))
-    checkmate::expect_class(comparison$PhenoData, "data.frame")
-    checkmate::expect_class(comparison$FeatureData, "data.frame")
+    checkmate::expect_class(comparison$ColData, "data.frame")
+    checkmate::expect_class(comparison$RowData, "data.frame")
     expect_length(comparison$Plots, 7)
     expect_true(all(c("Means", "Variances", "MeanVar", "LibrarySizes",
                       "ZerosGene", "ZerosCell", "MeanZeros") %in%
@@ -23,11 +23,11 @@ test_that("compareSCEs works", {
 
 test_that("diffSCEs works", {
     expect_length(difference, 5)
-    expect_true(all(c("Reference", "FeatureData", "PhenoData", "Plots",
+    expect_true(all(c("Reference", "RowData", "ColData", "Plots",
                       "QQPlots") %in% names(difference)))
     checkmate::expect_class(difference$Reference, "SingleCellExperiment")
-    checkmate::expect_class(difference$PhenoData, "data.frame")
-    checkmate::expect_class(difference$FeatureData, "data.frame")
+    checkmate::expect_class(difference$ColData, "data.frame")
+    checkmate::expect_class(difference$RowData, "data.frame")
     expect_length(difference$Plots, 7)
     expect_true(all(c("Means", "Variances", "MeanVar", "LibrarySizes",
                       "ZerosGene", "ZerosCell", "MeanZeros") %in%
diff --git a/vignettes/splatter.Rmd b/vignettes/splatter.Rmd
index af3eeff..be82bd3 100644
--- a/vignettes/splatter.Rmd
+++ b/vignettes/splatter.Rmd
@@ -121,7 +121,7 @@ The parameters required for the Splat simulation are briefly described here:
 * **Differential expression parameters**
     * `de.prob` - Probability that a gene is differentially expressed in each
       group or path.
-    * `de.loProb` - Probability that a differentially expressed gene is
+    * `de.downProb` - Probability that a differentially expressed gene is
       down-regulated.
     * `de.facLoc` - Location (meanlog) parameter for the differential expression
       factor log-normal distribution.
@@ -298,12 +298,12 @@ For more details about the `SingleCellExperiment` object refer to the [vignette]
 The `splatSimulate` function outputs the following additional information about
 the simulation:
 
-* **Cell information (`pData`)**
+* **Cell information (`colData`)**
     * `Cell` - Unique cell identifier.
     * `Group` - The group or path the cell belongs to.
     * `ExpLibSize` - The expected library size for that cell.
     * `Step` (paths only) - How far along the path each cell is.
-* **Gene information (`fData`)**
+* **Gene information (`rowData`)**
     * `Gene` - Unique gene identifier.
     * `BaseGeneMean` - The base expression level for that gene.
     * `OutlierFactor` - Expression outlier factor for that gene (1 is not an
@@ -313,7 +313,7 @@ the simulation:
       in a particular group (1 is not differentially expressed).
     * `GeneMean[Group]` - Expression level of a gene in a particular group after
       applying differential expression factors.
-* **Gene by cell information (`assayData`)**
+* **Gene by cell information (`assays`)**
     * `BaseCellMeans` - The expression of genes in each cell adjusted for
       expected library size.
     * `BCV` - The Biological Coefficient of Variation for each gene in
@@ -489,7 +489,7 @@ names(comparison$Plots)
 ```
 
 The returned list has three items. The first two are the combined datasets by
-gene (`FeatureData`) and by cell (`PhenoData`) and the third contains some
+gene (`RowData`) and by cell (`ColData`) and the third contains some
 comparison plots (produced using `ggplot2`), for example a plot of the
 distribution of means:
 
@@ -503,7 +503,7 @@ number of expressed genes against the library size:
 
 ```{r comparison-libsize-features}
 library("ggplot2")
-ggplot(comparison$PhenoData,
+ggplot(comparison$ColData,
        aes(x = total_counts, y = total_features_by_counts, colour = Dataset)) +
     geom_point()
 ```
-- 
GitLab