Skip to content
Snippets Groups Projects
Commit 6935d277 authored by Jeffrey Pullin's avatar Jeffrey Pullin
Browse files

Speed up lm method

parent add2bc55
No related branches found
No related tags found
No related merge requests found
Pipeline #10047 passed
#' Find marker genes using a linear model implemented via `lm`.
#'
#' Wraps the `stats::lm` function.
#' Find marker genes using a linear model's i.e. Student's t-test.
#'
#' @param sce A SingleCellExperiment object
#' @param pars parameters passed to the function.
......@@ -17,70 +15,47 @@ run_lm <- function(sce, pars) {
n_groups <- length(unique(colLabels(sce)))
n_genes <- nrow(sce)
x_ungrouped <- factor(as.numeric(colLabels(sce)))
out <- vector("list", length = n_groups)
for (i in seq_len(n_groups)) {
print("Group!")
group <- levels(colLabels(sce))[[i]]
x <- factor(as.numeric(colLabels(sce) == group))
group_out <- vector("list", length = n_genes)
for (j in seq_len(n_genes)) {
print(paste0("Gene", j))
gene <- rownames(sce)[[j]]
y <- as.vector(logcounts(sce[j, ]))
if (pars$covariate == "two_sample") {
grouped_model <- lm(y ~ x)
coefs <- summary(grouped_model)$coefficients
# Extract values from the fitted model.
p_value <- coefs[2, 4]
# Bonferonni correction.
p_value_adj <- p_value / n_genes
t_value <- coefs[2, 3]
score <- coefs[2, 1]
} else if (pars$covariate == "ungrouped") {
# FIXME: Hack to prevent computation problems.
y_ungrouped <- y + 1
ungrouped_model <- lm(y_ungrouped ~ 0 + x_ungrouped)
k <- n_groups - 1
K_vec <- rep(-1/k, n_groups)
K_vec[[j]] <- 1
K <- matrix(K_vec, nrow = 1)
contrast_fit <- multcomp::glht(ungrouped_model, linfct = K)
summary_fit <- summary(contrast_fit)
p_value <- summary_fit$test$pvalues[[1]]
p_value_adj <- p_value / n_genes
t_value <- summary_fit$test$tstat[[1]]
# FIXME What should this be?
score <- 0
} else {
stop("Invaild pars value.", call = FALSE)
}
t_stat <- function(X) {
m <- rowMeans(X)
n <- ncol(X)
var <- rowSums((X - m) ^ 2)
group_out[[j]] <- list(
p_value = p_value,
raw_statistic = score,
log_fc = 0,
p_value_adj = p_value_adj,
gene = gene,
cluster = group,
scaled_statistic = t_value
)
group_df <- dplyr::bind_rows(!!!group_out)
# This is implicitly Student's t-test so the degrees of freedom
# will be same between all genes so sorting on the scaled statistic
# i.e. the t-value is okay.
group_df <- dplyr::arrange(group_df, desc(abs(scaled_statistic)))
list(m = m, n = n, var = var)
}
out[[i]] <- group_df
g_1 <- t_stat(logcounts(sce)[, x == 1])
g_2 <- t_stat(logcounts(sce)[, x == 0])
num <- g_1$m - g_2$m
s2 <- (g_1$var + g_2$var) / (g_1$n + g_2$n - 2)
denom <- sqrt(s2 * (1 / g_1$n + 1 / g_2$n))
scaled_statistic <- num / denom
p_value <- pt(abs(scaled_statistic), df = g_1$n + g_2$n - 2,
lower.tail = FALSE)
p_value_adj <- p_value / n_genes
group_out <- tibble::tibble(
raw_statistic = num,
scaled_statistic = scaled_statistic,
p_value = p_value,
p_value_adj = p_value_adj,
log_fc = rep(0, n_genes),
gene = rownames(sce),
cluster = group
)
# This is implicitly Student's t-test so the degrees of freedom
# will be same between all genes so sorting on the scaled statistic
# i.e. the t-value is okay.
group_out <- dplyr::arrange(group_out, desc(abs(scaled_statistic)))
out[[i]] <- group_out
}
# Group the results for each group together, preserving the within group
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment