From 9ba5352da8cc25905bffbe0f692f629271364696 Mon Sep 17 00:00:00 2001
From: rlyu <rlyu@svi.edu.au>
Date: Mon, 14 Oct 2019 15:51:30 +1100
Subject: [PATCH] docker file update

---
 Dockerfile                     |  9 ++++++---
 case_study_data/case_study.Rmd | 23 ++++++++++++++++-------
 course_files/exprs-qc.Rmd      |  5 +++++
 course_files/pseudotime.Rmd    |  2 +-
 4 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 093cf04..8574149 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -194,10 +194,13 @@ COPY ./poststart.sh /home/jovyan
 
 # add course files
 COPY course_files /home/jovyan
-COPY case_study_data/case_study.Rmd /home/jovyan/
-COPY case_study_data/pre_processing_fq.Rmd /home/jovyan/
+COPY case_study_data /home/jovyan/case_study_data
+RUN ls -la /home/jovyan/
 
-COPY mig-sc-workshop-2019-data.tar.gz  /home/jovyan/data/
+COPY mig_2019_scrnaseq-workshop-data.tar.gz /home/jovyan
+RUN tar -xzvf mig_2019_scrnaseq-workshop-data.tar.gz -C /home/jovyan/data/ && rm mig_2019_scrnaseq-workshop-data.tar.gz
+RUN ls -la /home/jovyan/data/
+#COPY mig-sc-workshop-2019-data.tar.gz  /home/jovyan/data/
 # cp data/droplet_id_example_per_barcode.txt.gz  /home/jovyan/data/ && \
 #     cp data/pancreas -r  /home/jovyan/data/ && \
 #     cp data/tung -r /home/jovyan/data/ && \
diff --git a/case_study_data/case_study.Rmd b/case_study_data/case_study.Rmd
index 127db61..e7ba6e8 100644
--- a/case_study_data/case_study.Rmd
+++ b/case_study_data/case_study.Rmd
@@ -29,7 +29,7 @@ mkdir LD_cr_counts
 cd LD_cr_counts
 wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3612nnn/GSM3612832/suppl/GSM3612832_LD_genes.tsv.gz
 wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3612nnn/GSM3612832/suppl/GSM3612832_LD_barcodes.tsv.gz
-wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3612nnn/GSM3612832/suppl/GSM3612831_LD_matrix.mtx.gz
+wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM3612nnn/GSM3612832/suppl/GSM3612832_LD_matrix.mtx.gz
 
 cd ..
 mkdir Ctrl_cr_counts
@@ -52,9 +52,9 @@ library(Matrix)
 
 ### Read in control cells 
 
-cellbarcodes <- read.table("../case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_barcodes.tsv.gz",stringsAsFactors = FALSE)
-genenames <- read.table("../case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_genes.tsv.gz",stringsAsFactors = FALSE)
-molecules <- readMM("../case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_matrix.mtx.gz")
+cellbarcodes <- read.table("./case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_barcodes.tsv.gz",stringsAsFactors = FALSE)
+genenames <- read.table("./case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_genes.tsv.gz",stringsAsFactors = FALSE)
+molecules <- readMM("./case_study_data/retinal/GEO_downloads/Ctrl/GSM3612831_ctrl_matrix.mtx.gz")
 
 
 head(cellbarcodes)
@@ -74,9 +74,9 @@ sce_ctrl <- SingleCellExperiment(
 sce_ctrl
 
 ## Read in LD cells
-cellbarcodes <- read.table("../case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_barcodes.tsv.gz",stringsAsFactors = FALSE)
-genenames <- read.table("../case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_genes.tsv.gz",stringsAsFactors = FALSE)
-molecules <- readMM("../case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_matrix.mtx.gz")
+cellbarcodes <- read.table("./case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_barcodes.tsv.gz",stringsAsFactors = FALSE)
+genenames <- read.table("./case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_genes.tsv.gz",stringsAsFactors = FALSE)
+molecules <- readMM("./case_study_data/retinal/GEO_downloads/LD/GSM3612832_LD_matrix.mtx.gz")
 
 
 head(cellbarcodes)
@@ -246,6 +246,15 @@ plot(sce_cr$total_features_by_counts, sce_cr$pct_counts_Mito,
 
 sce_cr <- sce_cr[, keep]
 table(sce_cr$Sample)
+# relations of QC metrics respect to each other ( in rough aggreement)
+par(mfrow=c(1,2))
+plot(sce_cr$total_features_by_counts, sce_cr$total_counts/1e6,
+     xlab="Number of expressed genes",
+     ylab="Library size (millions)")
+plot(sce_cr$total_features_by_counts, sce_cr$pct_counts_Mito,
+     xlab="Number of expressed genes",
+     ylab="Mitochondrial proportion (%)")
+
 ```
 
 ### Doublets
diff --git a/course_files/exprs-qc.Rmd b/course_files/exprs-qc.Rmd
index de859af..99baa93 100644
--- a/course_files/exprs-qc.Rmd
+++ b/course_files/exprs-qc.Rmd
@@ -113,6 +113,8 @@ umi <- calculateQCMetrics(
         MT = isSpike(umi, "MT")
     )
 )
+umi
+colData(umi)
 ```
 
 
@@ -218,6 +220,8 @@ filter_by_ERCC <- umi$batch != "NA19098.r2"
 table(filter_by_ERCC)
 filter_by_MT <- umi$pct_counts_MT < 10
 table(filter_by_MT)
+filter_by_total_counts <- umi$total_counts > 25000
+#filter_by_expr_features <- umi$total_features_by_counts <7000
 ```
 
 __Exercise 4__
@@ -247,6 +251,7 @@ umi$use <- (
 )
 ```
 
+
 ```{r}
 table(umi$use)
 ```
diff --git a/course_files/pseudotime.Rmd b/course_files/pseudotime.Rmd
index 1ba09d4..37ae1f7 100644
--- a/course_files/pseudotime.Rmd
+++ b/course_files/pseudotime.Rmd
@@ -319,7 +319,7 @@ There are different ways of implementing the RGE framework, `Monocle 2` uses `DD
 
 DDRTree returns a principal tree of the centroids of cell clusters in low dimension, pseudotime is derived for individual cells by calculating geomdestic distance of their projections onto the tree from the root (user-defined or arbitrarily assigned).
 
-__Note__ Informally, a principal graph is like a principal curve which passes through the ‘middle’ of a data set but is
+_Note_ Informally, a principal graph is like a principal curve which passes through the ‘middle’ of a data set but is
 allowed to have branches.
 
 ```{r monocle2-all-genes, message=FALSE, warning=FALSE,include=TRUE}
-- 
GitLab