diff --git a/Dockerfile b/Dockerfile index 5daf4ed8f52054eda978ecd8aa2fe9fbc207c8ad..bac78a1291055a9e522af6daf0140f2dac99d986 100644 --- a/Dockerfile +++ b/Dockerfile @@ -170,17 +170,19 @@ RUN sudo apt-get install -yq --no-install-recommends libudunits2-0 libudunits2-d RUN Rscript -e 'devtools::install_github(c("cole-trapnell-lab/leidenbase"))' -RUN Rscript -e 'devtools::install_github(c("cole-trapnell-lab/monocle3"))' RUN Rscript -e 'devtools::install_github(c("ChristophH/sctransform"))' # install github packages -RUN Rscript -e 'devtools::install_github(c("immunogenomics/harmony", "LTLA/beachmat", "MarioniLab/DropletUtils", "tallulandrews/M3Drop", "hemberg-lab/scRNA.seq.funcs"))' RUN Rscript -e 'install.packages(c("foreach"))' RUN Rscript -e 'install.packages(c("iterators"))' RUN Rscript -e 'install.packages(c("rsample"))' RUN Rscript -e 'install.packages(c("Rcpp"))' RUN Rscript -e 'install.packages(c("rstan"))' +RUN Rscript -e 'install.packages(c("gam"))' +RUN Rscript -e 'devtools::install_github(c("cole-trapnell-lab/monocle3"))' +RUN Rscript -e 'devtools::install_github(c("immunogenomics/harmony", "LTLA/beachmat", "MarioniLab/DropletUtils", "tallulandrews/M3Drop", "hemberg-lab/scRNA.seq.funcs"))' + # install github packages RUN Rscript -e 'devtools::install_github(c("Vivianstats/scImpute", "theislab/kBET", "kieranrcampbell/ouija", "hemberg-lab/scfind"))' @@ -191,6 +193,19 @@ COPY ./poststart.sh /home/jovyan # add course files COPY course_files /home/jovyan +COPY case_study_data/case_study.Rmd /home/jovyan/ +COPY case_study_data/pre_processing_fq.Rmd /home/jovyan/ + +COPY mig-sc-workshop-2019-data.tar.gz /home/jovyan/data/ +# cp data/droplet_id_example_per_barcode.txt.gz /home/jovyan/data/ && \ +# cp data/pancreas -r /home/jovyan/data/ && \ +# cp data/tung -r /home/jovyan/data/ && \ +# cp data/2000_reference.transcripts.fa /home/jovyan/data/ && \ +# cp data/droplet_id_example_truth.gz /home/jovyan/data/ && \ +# cp data/deng -r /home/jovyan/data/ && \ +# cp data/EXAMPLE.cram /home/jovyan/data/ && \ +# cp data/sce -r /home/jovyan/data/ + RUN chmod -R 777 /home/jovyan USER $NB_UID diff --git a/course_files/book.bib b/course_files/book.bib index 0af21371c40031f925914340561c1d448b7e75c0..e37e01a4507a3d03465b938c100c3e135a73e212 100644 --- a/course_files/book.bib +++ b/course_files/book.bib @@ -1127,3 +1127,43 @@ doi = {10.18637/jss.v059.i10} language = "en", doi = "10.1101/574574" } + + +@ARTICLE{Bais2019-hf, + title = "scds: Computational Annotation of Doublets in {Single-Cell} {RNA} + Sequencing Data", + author = "Bais, Abha S and Kostka, Dennis", + abstract = "MOTIVATION: Single-cell RNA sequencing (scRNA-seq) technologies + enable the study of transcriptional heterogeneity at the + resolution of individual cells and have an increasing impact on + biomedical research. However, it is known that these methods + sometimes wrongly consider two or more cells as single cells, and + that a number of so-called doublets is present in the output of + such experiments. Treating doublets as single cells in downstream + analyses can severely bias a study's conclusions, and therefore + computational strategies for the identification of doublets are + needed. RESULTS: With scds, we propose two new approaches for in + silico doublet identification: Co-expression based doublet + scoring (cxds) and binary classification based doublet scoring + (bcds). The co-expression based approach, cxds, utilizes + binarized (absence/presence) gene expression data and, employing + a binomial model for the co-expression of pairs of genes, yields + interpretable doublet annotations. bcds, on the other hand, uses + a binary classification approach to discriminate artificial + doublets from original data. We apply our methods and existing + computational doublet identification approaches to four data sets + with experimental doublet annotations and find that our methods + perform at least as well as the state of the art, at comparably + little computational cost. We observe appreciable differences + between methods and across data sets and that no approach + dominates all others. In summary, scds presents a scalable, + competitive approach that allows for doublet annotation of data + sets with thousands of cells in a matter of seconds. + AVAILABILITY: scds is implemented as a Bioconductor R package + (doi: 10.18129/B9.bioc.scds). SUPPLEMENTARY INFORMATION: + Supplementary data are available at Bioinformatics online.", + journal = "Bioinformatics", + month = sep, + year = 2019, + language = "en" +}