From 9ba5352da8cc25905bffbe0f692f629271364696 Mon Sep 17 00:00:00 2001 From: rlyu <rlyu@svi.edu.au> Date: Mon, 14 Oct 2019 15:51:30 +1100 Subject: [PATCH] docker file update --- Dockerfile | 9 ++++++--- case_study_data/case_study.Rmd | 23 ++++++++++++++++------- course_files/exprs-qc.Rmd | 5 +++++ course_files/pseudotime.Rmd | 2 +- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 093cf04..8574149 100644 --- a/Dockerfile +++ b/Dockerfile @@ -194,10 +194,13 @@ COPY ./poststart.sh /home/jovyan # add course files COPY course_files /home/jovyan -COPY case_study_data/case_study.Rmd /home/jovyan/ -COPY case_study_data/pre_processing_fq.Rmd /home/jovyan/ +COPY case_study_data /home/jovyan/case_study_data +RUN ls -la /home/jovyan/ -COPY mig-sc-workshop-2019-data.tar.gz /home/jovyan/data/ +COPY mig_2019_scrnaseq-workshop-data.tar.gz /home/jovyan +RUN tar -xzvf mig_2019_scrnaseq-workshop-data.tar.gz -C /home/jovyan/data/ && rm mig_2019_scrnaseq-workshop-data.tar.gz +RUN ls -la /home/jovyan/data/ +#COPY mig-sc-workshop-2019-data.tar.gz /home/jovyan/data/ # cp data/droplet_id_example_per_barcode.txt.gz /home/jovyan/data/ && \ # cp data/pancreas -r /home/jovyan/data/ && \ # cp data/tung -r /home/jovyan/data/ && \ diff --git a/case_study_data/case_study.Rmd b/case_study_data/case_study.Rmd index 127db61..e7ba6e8 100644 --- a/case_study_data/case_study.Rmd +++ b/case_study_data/case_study.Rmd @@ -29,7 +29,7 @@ mkdir LD_cr_counts cd LD_cr_counts wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3612nnn/GSM3612832/suppl/GSM3612832_LD_genes.tsv.gz wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3612nnn/GSM3612832/suppl/GSM3612832_LD_barcodes.tsv.gz -wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3612nnn/GSM3612832/suppl/GSM3612831_LD_matrix.mtx.gz +wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3612nnn/GSM3612832/suppl/GSM3612832_LD_matrix.mtx.gz cd .. mkdir Ctrl_cr_counts @@ -52,9 +52,9 @@ library(Matrix) ### Read in control cells -cellbarcodes <- read.table("../case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_barcodes.tsv.gz",stringsAsFactors = FALSE) -genenames <- read.table("../case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_genes.tsv.gz",stringsAsFactors = FALSE) -molecules <- readMM("../case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_matrix.mtx.gz") +cellbarcodes <- read.table("./case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_barcodes.tsv.gz",stringsAsFactors = FALSE) +genenames <- read.table("./case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_genes.tsv.gz",stringsAsFactors = FALSE) +molecules <- readMM("./case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_matrix.mtx.gz") head(cellbarcodes) @@ -74,9 +74,9 @@ sce_ctrl <- SingleCellExperiment( sce_ctrl ## Read in LD cells -cellbarcodes <- read.table("../case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_barcodes.tsv.gz",stringsAsFactors = FALSE) -genenames <- read.table("../case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_genes.tsv.gz",stringsAsFactors = FALSE) -molecules <- readMM("../case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_matrix.mtx.gz") +cellbarcodes <- read.table("./case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_barcodes.tsv.gz",stringsAsFactors = FALSE) +genenames <- read.table("./case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_genes.tsv.gz",stringsAsFactors = FALSE) +molecules <- readMM("./case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_matrix.mtx.gz") head(cellbarcodes) @@ -246,6 +246,15 @@ plot(sce_cr$total_features_by_counts, sce_cr$pct_counts_Mito, sce_cr <- sce_cr[, keep] table(sce_cr$Sample) +# relations of QC metrics respect to each other ( in rough aggreement) +par(mfrow=c(1,2)) +plot(sce_cr$total_features_by_counts, sce_cr$total_counts/1e6, + xlab="Number of expressed genes", + ylab="Library size (millions)") +plot(sce_cr$total_features_by_counts, sce_cr$pct_counts_Mito, + xlab="Number of expressed genes", + ylab="Mitochondrial proportion (%)") + ``` ### Doublets diff --git a/course_files/exprs-qc.Rmd b/course_files/exprs-qc.Rmd index de859af..99baa93 100644 --- a/course_files/exprs-qc.Rmd +++ b/course_files/exprs-qc.Rmd @@ -113,6 +113,8 @@ umi <- calculateQCMetrics( MT = isSpike(umi, "MT") ) ) +umi +colData(umi) ``` @@ -218,6 +220,8 @@ filter_by_ERCC <- umi$batch != "NA19098.r2" table(filter_by_ERCC) filter_by_MT <- umi$pct_counts_MT < 10 table(filter_by_MT) +filter_by_total_counts <- umi$total_counts > 25000 +#filter_by_expr_features <- umi$total_features_by_counts <7000 ``` __Exercise 4__ @@ -247,6 +251,7 @@ umi$use <- ( ) ``` + ```{r} table(umi$use) ``` diff --git a/course_files/pseudotime.Rmd b/course_files/pseudotime.Rmd index 1ba09d4..37ae1f7 100644 --- a/course_files/pseudotime.Rmd +++ b/course_files/pseudotime.Rmd @@ -319,7 +319,7 @@ There are different ways of implementing the RGE framework, `Monocle 2` uses `DD DDRTree returns a principal tree of the centroids of cell clusters in low dimension, pseudotime is derived for individual cells by calculating geomdestic distance of their projections onto the tree from the root (user-defined or arbitrarily assigned). -__Note__ Informally, a principal graph is like a principal curve which passes through the ‘middle’ of a data set but is +_Note_ Informally, a principal graph is like a principal curve which passes through the ‘middle’ of a data set but is allowed to have branches. ```{r monocle2-all-genes, message=FALSE, warning=FALSE,include=TRUE} -- GitLab