Skip to content
Snippets Groups Projects
Commit 7dfe8aad authored by Jeffrey Pullin's avatar Jeffrey Pullin
Browse files

Add calculation of log fold-change for methods which do not calculate it

parent 708cfdd8
No related branches found
No related tags found
No related merge requests found
Pipeline #9833 passed
......@@ -49,10 +49,12 @@ run_cepo <- function(sce, pars) {
)
)
lfcs <- calculate_log_fc(sce, genes = result$gene, clusters = result$cluster)
result <- dplyr::mutate(
result,
raw_statistic = 0,
log_fc = 0,
log_fc = lfcs,
scaled_statistic = 0,
p_value = 0,
p_value_adj = 0
......
......@@ -74,16 +74,25 @@ run_nsforest <- function(sce, pars) {
result <- tidyr::unnest(markers, cols = everything())
# Remove the added letter
if (is_zeisel(sce) || is_paul(sce)) {
# Remove the added letter from both the selected genes and the original SCE.
if (is_zeisel(sce) || is_paul(sce) || is_astrocyte(sce)) {
result <- dplyr::mutate(result, gene = substr(gene, 2, nchar(gene)))
rownames(sce) <- substr(rownames(sce), 2, nchar(rownames(sce)))
}
result <- dplyr::arrange(result, cluster)
# FIXME: NSForest has a *very* strange tendency to sometimes return marker
# genes which are not present in the original SingleCellExperiment object...
# For now we simply consider this a software failure and remove those genes
# from the returned marker genes.
result <- dplyr::filter(result, gene %in% rownames(sce))
lfcs <- calculate_log_fc(sce, genes = result$gene, clusters = result$cluster)
result <- dplyr::mutate(
result,
log_fc = 0,
log_fc = lfcs,
raw_statistic = 0,
scaled_statistic = 0,
p_value = 0,
......
......@@ -61,7 +61,13 @@ run_rankcorr <- function(sce, pars) {
result <- tibble::tibble(
cluster = rep(names(lookup), lengths(mgs)),
gene = unlist(mgs),
log_fc = 0,
)
lfcs <- calculate_log_fc(sce, genes = result$gene, clusters = result$cluster)
result <- dplyr::mutate(result,
result,
log_fc = lfcs,
raw_statistic = 0,
scaled_statistic = 0,
p_value = 0,
......
......@@ -94,3 +94,62 @@ validate_mgs_result <- function(result) {
}
}
#' Calculate log fold change
#'
#' @param sce A SingleCellExperiment object
#' @param genes A vector of gene names
#' @param clusters A vector of cluster names
#'
#' @return A vector of one-vs-rest log_2 fold changes between the cells in the
#' cluster of interest and those in all other clusters.
#'
#' @details We follow Seurat's calculation of the log fold change.
#'
calculate_log_fc <- function(sce, genes, clusters) {
stopifnot(is(sce, "SingleCellExperiment"))
stopifnot(all(genes %in% rownames(sce)))
# FIXME: Cepo will mangle some clusters names.
#stopifnot(all(clusters %in% colLabels(sce)))
stopifnot(length(genes) == length(clusters))
unique_clusters <- unique(clusters)
unique_genes <- unique(genes)
data <- exp(logcounts(sce[unique_genes, ])) - 1
out <- tibble::tibble(gene = genes, cluster = clusters)
result <- tibble::tibble()
for (cluster in unique_clusters) {
cluster_ind <- colLabels(sce) == cluster
cluster_data <- data[, cluster_ind]
non_cluster_data <- data[, !cluster_ind]
num <- rowMeans(cluster_data) + 1
denom <- rowMeans(non_cluster_data) + 1
lfcs <- log(num, base = 2) - log(denom, base = 2)
cluster_result <- tibble::tibble(
gene = unique_genes,
cluster = cluster,
lfc = lfcs
)
result <- dplyr::bind_rows(result, cluster_result)
}
out <- dplyr::left_join(out, result, by = c("gene", "cluster"))
out$lfc
}
# tibble(gene = result$gene, lfcs, cluster = result$cluster) %>%
# filter(gene == "HLA-DRB1")
#
# test$result %>%
# select(gene, cluster, log_fc) %>%
# filter(gene == "HLA-DRB1")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment