Skip to content
Snippets Groups Projects
Commit ea656946 authored by Jeffrey Pullin's avatar Jeffrey Pullin
Browse files

Add more documentation to top genes functions

parent d4682c47
No related branches found
No related tags found
No related merge requests found
Pipeline #5972 passed
#' Get the top n genes from Seurat
#' Get the top genes from a marker gene selection method
#'
#' @param res
#' @param n The number of top genes to select
#' @param output The output from a supported marker gene selection method
#' @param n The number of top marker genes to return for each cluster
#' @param ... Other arguments passed to the specific functions for each method
#'
#' @return
#' @return A list of the top `n` marker genes for each cluster.
#'
#' @details ave_logFC
#' @details The currently supported marker gene selection methods are:
#' * `scran::FindMarkers`
#' * `Seurat::FindAllMarkers`
#'
get_top_genes <- function(output, n, ...) {
out <- switch(
output$pars$method,
"scran" = get_top_genes_scran(output, n, ...),
"seurat" = get_top_genes_seurat(output, n, ...),
stop("Invalid method", call. = FALSE)
)
out
}
#' Get the top n genes from Seurat's marker gene output
#'
#' @param output The output from the `Seurat::FindAllMarkers` function.
#' @param n The number of top genes to select.
#'
#' @return A list of the top `n` marker genes for each cluster.
#'
#' @details In order to break ties in the sorting of p-values (which may occur
#' when the p-values are all exactly 0) the genes are also sorted by
#' `"avg_logFC"` (largest first).
#'
#' When Seurat uses the `"roc"` test p-values are not generated. In this case
#' the genes are ranked by their AUC values.
#'
get_top_genes_seurat <- function(output, n) {
......@@ -16,9 +45,9 @@ get_top_genes_seurat <- function(output, n) {
# There are *no* p-values calculated for the ROC test.
if (output$pars$test.use == "roc") {
top_genes <- dplyr::arrange(top_genes, myAUC, avg_logFC)
top_genes <- dplyr::arrange(top_genes, myAUC, desc(avg_logFC))
} else {
top_genes <- dplyr::arrange(top_genes, p_val, avg_logFC)
top_genes <- dplyr::arrange(top_genes, p_val, desc(avg_logFC))
}
top_genes <- dplyr::slice(top_genes, 1:n)
......@@ -29,10 +58,30 @@ get_top_genes_seurat <- function(output, n) {
out
}
# TODO: Document
get_top_genes_scran <- function(output, n) {
#' Get the top n genes from scran's marker gene output
#'
#' @param output The output from the `scran::findMarkers` function.
#' @param n The number of top genes to select.
#' @param any_method The method for selecting the top genes used when
#' `pval.type = "any"`.
#'
#' @return A list of the top `n` marker genes for each cluster.
#'
#' @details When `pval.type = any` (the default) scran recommends choosing the
#' marker genes by filtering on the `Top` column of the output. From the
#' OSCA book: "The set of genes with Top <= x is the union of the top x genes
#' (ranked by p-value) from each pairwise comparison". Selecting any given
#' value of Top to filter on will therefore not guarantee that exactly
#' n genes will be returned. Currently, we apply a rought heuristic that
#' selects `Top` so that the number of returned genes is approximately `n`.
#' This will definitely need to be refined in the future. Another option
#' would be to use the returned p-value in this case which is calculated by
#' applying Simes’ method to the pairwise p-values pairwise comparison.
#'
get_top_genes_scran <- function(output, n, any_method = "top_heuristic") {
stopifnot(output$pars$method == "scran")
stopifnot(any_method == "top_heuristic")
genes <- output$result
n_clusters <- length(genes)
......@@ -42,9 +91,13 @@ get_top_genes_scran <- function(output, n) {
x <- as.data.frame(x)
if (output$pars$pval.type == "any") {
# TODO: Think more about this:
# TODO: Think more about this heuristic.
# Empirically, this produces approximately the right number of genes.
out <- filter(x, Top < (n / n_clusters) * 2)
if (any_method == "top_heuristic") {
out <- filter(x, Top < (n / n_clusters) * 2)
}
} else {
out <- dplyr::arrange(x, p.value)
out <- dplyr::slice(x, 1:n)
......@@ -56,14 +109,4 @@ get_top_genes_scran <- function(output, n) {
top_genes
}
get_top_genes <- function(output, n) {
out <- switch(
output$pars$method,
"scran" = get_top_genes_scran(output, n),
"seurat" = get_top_genes_seurat(output, n),
stop("Invalid method", call. = FALSE)
)
out
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment