Skip to content
Snippets Groups Projects
Commit b895f6ed authored by Jeffrey Pullin's avatar Jeffrey Pullin
Browse files

Add cluster analysis

parent e45e05d1
No related branches found
No related tags found
No related merge requests found
Pipeline #6393 passed
......@@ -16,10 +16,18 @@ library(ggupset)
library(ggplot2)
library(tidyr)
library(SingleCellExperiment)
library(logisticPCA)
library(pals)
```
```{r load-data}
source(here::here("code", "top-genes.R"))
source(here::here("code", "analysis-utils.R"))
config <- yaml::read_yaml(here::here("config.yaml"))
res_paths <- here::here(list.files(config$results_folder, full.names = TRUE))
sim_paths <- here::here(list.files(config$sim_data_folder, full.names = TRUE))
top_genes <- list()
# First file is the countsimQC report.
......@@ -43,17 +51,61 @@ concordance_data <- retrive_simulation_parameters() %>%
```
```{r plot-all-10}
test <- concordance_data %>%
concordance_data %>%
filter(sim_label == "standard_sim" & rep == 2) %>%
select(pars, top_genes) %>%
rowwise() %>%
mutate(top_genes = list(top_genes[[1]])) %>%
unnest(top_genes) %>%
nest_by(top_genes) %>%
mutate(data = list(data[["pars"]])) %>%
print()
ungroup() %>%
as.list()
test %>%
ggplot(aes(x = data)) +
geom_bar() +
scale_x_upset()
```
Tried on all genes, but convergence issues in logisticPCA.
```{r create-clustering-data}
long_data <- concordance_data %>%
rowwise() %>%
filter(sim_label == "standard_sim" & rep == 1) %>%
select(pars, genes = top_genes) %>%
# Only use group 1 genes.
mutate(genes = list(genes[[1]])) %>%
ungroup() %>%
unnest_longer(col = genes)
binary_data <- model.matrix(~ . + 0, data = long_data["genes"])
cluster_data <- cbind(pars = long_data$pars, as.data.frame(binary_data)) %>%
group_by(pars) %>%
summarise(across(everything(), sum))
cluster_mat <- as.matrix(test_data[, -1])
# Sanity checking.
# rowSums(test_mat)
# colSums(test_mat)
# max(test_mat)
```
```{r plot-pca}
pca <- prcomp(cluster_mat)
data.frame(pc1 = pca$x[, 1], pc2 = pca$x[, 2], pars = cluster_data$pars) %>%
ggplot(aes(pc1, pc2, colour = pars)) +
geom_point() +
scale_colour_manual(values = unname(polychrome(20))) +
theme_bw()
```
```{plot-logsiticpca}
log_pca <- logisticPCA(cluster_mat)
data.frame(pc1 = log_pca$PCs[, 1], pc2 = log_pca$PCs[, 2],
pars = cluster_data$pars) %>%
ggplot(aes(pc1, pc2, colour = pars)) +
geom_point() +
scale_colour_manual(values = unname(polychrome(20))) +
theme_bw()
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment