Commit 6303b48f authored by Davis McCarthy's avatar Davis McCarthy
Browse files

Updating workflow and adding dockerfile

parent 6b6d34fd
FROM rocker/verse
LABEL authors="dmccarthy@svi.edu.au" \
maintainer="Davis McCarthy <dmccarthy@svi.edu.au>" \
description="Docker image containing all requirements for EMBO Single-cell Omics sc-methylation analysis"
RUN apt-get update && \
apt-get -y upgrade && \
apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
libbz2-dev \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
COPY Miniconda3-latest-Linux-x86_64.sh /
RUN bash /Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda
ENV PATH /miniconda:/miniconda/bin:$PATH
COPY environment.yml /
RUN conda env create -f /environment.yml python=3.6 && conda clean -a
ENV PATH /opt/conda/envs/embo-singlecellomics/bin:$PATH
#COPY methylQA /methylQA
# go to the methylQA folder
#RUN cd /methylQA/
# then run a make command here
#RUN make
# the binary methylQA file will be generated
#ENV PATH /methylQA:/methylQA/bin:$PATH
RUN mkdir -p /usr/local/lib/R/site-library
ADD scripts/install.R /tmp/
RUN R -f /tmp/install.R
......@@ -5,7 +5,7 @@ single-cell bisulfite sequencing data. We will assay two cell types, probably 16
cells in total.
## Goals
Two main goals:
We will process raw single-cell methylation data and conduct analyses to show the following two main goals:
1. Methylation profiles define cell type (i.e. cells will cluster apart by e.g. PCA)
2. Context specificity of methylation variance. E.g. in mouse ES cells, CGIs are homogenous (and low in methylation), repeat elements are homogenously high and active enhancer elements are heterogeneous. This is interesting because the enhancer elements are cell type specific and thus some variation in the methylation levels here implies plasticity in cell identity which could be important for lineage formation.
......@@ -15,7 +15,7 @@ Clone or download this repository so that you have the necessary code, data and
If you're familiar with `git`:
```
git clone https://github.com/davismcc/SingleCellOmics_Heidelberg_Apr2017.git
git clone https://gitlab.svi.edu.au/biocellgen-public/embo-singlecellomics-methylation_2019-05_heidelberg.git
```
If not, you can download a zip file of the repository by clicking the green "Clone or download" button above.
......@@ -78,7 +78,7 @@ in case you wish to use it for the second part of the analysis.
## Acknowledgements
Many thanks to Stephen Clark and Ricard Argeluet for help and advice. Stephen
Many thanks to Stephen Clark and Ricard Argelaguet for help and advice. Stephen
advised on the course aims and structure and directed generation of raw data.
Ricard provided advice on analysis and provided data processing scripts and
processed datasets for use.
......@@ -5,17 +5,17 @@ Davis McCarthy
EMBL-EBI
April 2017
Last updated: 17 May 2019
Run on a cluster with a command like:
snakemake --jobs 1000 --latency-wait 30 --cluster 'bsub -R "rusage[mem=32000]" -M 32000 -o ./snake_logs -e ./snake_logs'
"""
import glob
import os
#from subprocess import run
import pandas as pd
import re
TEST = True
TEST = False
if TEST:
SAMPLES_LONG = glob.glob('data/fastq/test/*.fastq.gz')
......@@ -32,6 +32,8 @@ else:
fastqc_html_reports = expand('reports/fastqc/{sample}_fastqc.html', sample = SAMPLES_LONG)
print(SAMPLES_LONG)
print(SAMPLES_MERGE)
rule all:
input:
......@@ -42,19 +44,21 @@ rule all:
rule fastqc_reports:
input:
'data/fastq/test/{sample}.fastq.gz'
'data/fastq/{sample}.fastq.gz'
## if test = False, remove test/ from path above
output:
'reports/fastqc/{sample}_fastqc.html'
params:
output_dir="reports/fastqc/"
singularity:
"docker://dmccarthy/embo-singlecellomics-methylation_2019-05_heidelberg:0.1"
shell:
'/Users/davis/src/FastQC.app/Contents/MacOS/fastqc -o {params.output_dir} {input}'
rule trim_fastq:
input:
'data/fastq/test/{sample}.fastq.gz'
'data/fastq/{sample}.fastq.gz'
## if test = False, remove test/ from path above
output:
temp('{sample}_trimmed.fq.gz'),
......@@ -73,6 +77,8 @@ rule fastqc_reports_trimmed:
temp('reports/fastqc/{sample}_trimmed_fastqc.html')
params:
output_dir="reports/fastqc/"
singularity:
"docker://dmccarthy/embo-singlecellomics-methylation_2019-05_heidelberg:0.1"
shell:
'/Users/davis/src/FastQC.app/Contents/MacOS/fastqc -o {params.output_dir} {input}'
......@@ -82,6 +88,8 @@ rule bismark_prepare_genome:
'genome'
output:
'genome/Bisulfite_Genome'
singularity:
"docker://dmccarthy/embo-singlecellomics-methylation_2019-05_heidelberg:0.1"
shell:
'bismark_genome_preparation {input}'
......@@ -92,6 +100,8 @@ rule bismark:
'genome/Bisulfite_Genome'
output:
temp('data/bismark/raw/{sample}_trimmed_bismark_bt2.bam') ## CHECK BISMARK OUTPUT
singularity:
"docker://dmccarthy/embo-singlecellomics-methylation_2019-05_heidelberg:0.1"
shell:
'bismark --non_directional --genome genome -o data/bismark/raw '
'{input}'
......@@ -102,6 +112,8 @@ rule bismark_dedup:
'data/bismark/raw/{sample}_trimmed_bismark_bt2.bam'
output:
temp('data/bismark/raw/{sample}_trimmed_bismark_bt2.deduplicated.bam') ## CHECK BISMARK OUTPUT
singularity:
"docker://dmccarthy/embo-singlecellomics-methylation_2019-05_heidelberg:0.1"
shell:
'deduplicate_bismark --bam {input}'
......@@ -111,6 +123,8 @@ rule bismark_methylation:
'data/bismark/raw/{sample}_trimmed_bismark_bt2.deduplicated.bam'
output:
'data/bismark/methyl/{sample}_trimmed_bismark_bt2.deduplicated.bismark.cov.gz'
singularity:
"docker://dmccarthy/embo-singlecellomics-methylation_2019-05_heidelberg:0.1"
shell:
'bismark_methylation_extractor --gzip --bedGraph '
'-o data/bismark/methyl {input}'
......@@ -124,6 +138,8 @@ rule multiqc:
expand('{sample}.fastq.gz_trimming_report.txt', sample = SAMPLES)
output:
'reports/multiqc/multiqc_report.html'
singularity:
"docker://dmccarthy/embo-singlecellomics-methylation_2019-05_heidelberg:0.1"
shell:
'multiqc --force --filename {output} '
'reports/fastqc ./ '
......@@ -139,6 +155,8 @@ rule merge_methylation:
params:
indir = 'data/bismark/methyl',
outdir = 'data/bismark/merged'
singularity:
"docker://dmccarthy/embo-singlecellomics-methylation_2019-05_heidelberg:0.1"
shell:
'RScript scripts/merge.R -i {params.indir} -o {params.outdir}'
......@@ -152,5 +170,7 @@ rule annotate_methylation:
indir = 'data/bismark/merged',
annodir = 'annotation',
outdir = 'results'
singularity:
"docker://dmccarthy/embo-singlecellomics-methylation_2019-05_heidelberg:0.1"
shell:
'RScript scripts/annotate.R -i {params.indir} -a {params.annodir} -o {params.outdir}'
name: embo-singlecellomics
channels:
- defaults
- bioconda
- conda-forge
dependencies:
- conda-forge::openjdk=8.0.144 # Needed for FastQC docker - see bioconda/bioconda-recipes#5026
- fastqc=0.11.7
- multiqc=1.5
- cutadapt=1.15
- trim-galore=0.4.5
- samtools=1.8
- tabix=0.2.5
- bowtie2=2.3.5
- bismark=0.22.1
- snakemake=5.4.5
## install script for R pkgs
options(repos = list("CRAN" = "https://cloud.r-project.org",
"BioC" = "https://bioconductor.org"))
install.packages("BiocManager")
pkgs <- c(
"data.table",
"docopt",
"ggthemes",
"ggforce",
"GenomicRanges",
"irlba",
"iSEE",
"MultiAssayExperiment",
"org.Hs.eg.db",
"org.Mm.eg.db",
"pcaMethods",
"RCurl",
"Rtsne",
"scater",
"scran",
"SC3",
"slalom",
"variancePartition"
)
ap.db <- available.packages(contrib.url(BiocManager::repositories()))
ap <- rownames(ap.db)
pkgs_to_install <- pkgs[pkgs %in% ap]
# do not reinstall packages that are already installed in the image
ip.db <- installed.packages()
ip <- rownames(ip.db)
pkgs_to_install <- pkgs_to_install[!(pkgs_to_install %in% ip)]
BiocManager::install(pkgs_to_install)
## just in case there were warnings, we want to see them
## without having to scroll up:
warnings()
if (!is.null(warnings()))
{
w <- capture.output(warnings())
if (length(grep("is not available|had non-zero exit status", w)))
quit("no", 1L)
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment