Skip to content
Snippets Groups Projects
Commit 257749a2 authored by Jeffrey Pullin's avatar Jeffrey Pullin
Browse files

Add CITE-seq dataset

parent 22d56804
No related branches found
No related tags found
No related merge requests found
......@@ -11,6 +11,7 @@ suppressMessages({
library(SingleCellExperiment)
library(readr)
library(dplyr)
library(Seurat)
})
raw_lawlor <- LawlorPancreasData()
......@@ -101,3 +102,48 @@ colData(ss3_pbmc) <- colData(ss3_pbmc) |>
DataFrame()
saveRDS(ss3_pbmc, here::here("data", "raw_data", "ss3_pbmc.rds"))
# CITE-seq data
# Adapted from the Seurat tutorial:
# https://satijalab.org/seurat/articles/multimodal_vignette.html
cbmc_rna <- as.sparse(
read.csv(file = here::here("data", "downloaded_data",
"GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz"),
sep = ",",
header = TRUE,
row.names = 1)
)
# Discard all but the top 100 most highly expressed mouse genes, and remove the
# 'HUMAN_' from the CITE-seq prefix.
cbmc_rna <- CollapseSpeciesExpressionMatrix(cbmc_rna)
# Load in the ADT UMI matrix
cbmc_adt <- as.sparse(
read.csv(file = here::here("data", "downloaded_data",
"GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz"),
sep = ",",
header = TRUE,
row.names = 1)
)
cbmc <- CreateSeuratObject(counts = cbmc_rna)
adt_assay <- CreateAssayObject(counts = cbmc_adt)
cbmc[["ADT"]] <- adt_assay
cbmc <- NormalizeData(cbmc)
cbmc <- FindVariableFeatures(cbmc)
cbmc <- ScaleData(cbmc)
cbmc <- RunPCA(cbmc, verbose = FALSE)
cbmc <- FindNeighbors(cbmc, dims = 1:30)
cbmc <- FindClusters(cbmc, resolution = 0.2, verbose = FALSE)
cbmc <- RunUMAP(cbmc, dims = 1:30)
cbmc <- RunTSNE(cbmc, dims = 1:30)
DimPlot(cbmc, label = TRUE)
citeseq <- as.SingleCellExperiment(cbmc)
saveRDS(citeseq, here::here("data", "raw_data", "citeseq.rds"))
# This scripts prepares and saves the `citeseq` data.
args <- R.utils::commandArgs(
trailingOnly = TRUE,
asValues = TRUE
)
suppressMessages({
library(TENxPBMCData)
library(scater)
library(Seurat)
library(scran)
})
set.seed(3112022)
# Extensive processing of this data occurs in the download_data file, perhaps
# it should be moved here.
citeseq <- readRDS(args$raw_data)
rowData(citeseq) <- DataFrame(value = rownames(citeseq))
# Filter to the top 2000 genes.
dec_citeseq <- modelGeneVarByPoisson(citeseq)
top_citeseq <- getTopHVGs(dec_citeseq, n = 2000)
citeseq <- citeseq[top_citeseq, ]
# All cluster labels (not annotated).
colLabels(citeseq) <- factor(citeseq$RNA_snn_res.0.2)
saveRDS(citeseq, args$data)
......@@ -10,6 +10,7 @@ all_data_ids:
- "paul"
- "zhao"
- "ss3_pbmc"
- "citeseq"
general_sim_data_ids: ["zeisel", "pbmc3k", "lawlor", "paul"]
time_sim_data_ids: ["pbmc3k"]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment