Skip to content
Snippets Groups Projects
Commit 449c56bf authored by Ruqian Lyu's avatar Ruqian Lyu
Browse files

add run scalability script

parent c56b08df
No related branches found
No related tags found
No related merge requests found
Pipeline #9564 passed
## Snakemake file for scalability test sscocaller per chromosome
## author Ruqian Lyu
## date 2/Jan/2022
rule all:
input:
expand(["output/scalability/chr1_pos_{vcount}.txt","output/scalability/chr1.pos.{vcount}.vcf.gz"], vcount=["0.9","0.8","0.7","0.6","0.5","0.4","0.3","0.2","0.1","0.05","0.01"]),
expand(["output/scalability/sgcocaller/crossover/varVariants/{vcount}/v{vcount}_chr1_vi.mtx","output/scalability/sgcocaller/swphase/varVariants/{vcount}/v{vcount}_chr1_corrected_phased_snpAnnot.txt","output/scalability/sgcocaller/xo_crossover/varVariants/{vcount}/v{vcount}_chr1_vi.mtx"], vcount=["0.9","0.8","0.7","0.6","0.5","0.4","0.3","0.2","0.1","0.05","0.01"]),
expand(["output/scalability/sgcocaller/crossover/varReads/{percent}/r{percent}_chr1_vi.mtx","output/scalability/sgcocaller/xo_crossover/varReads/{percent}/r{percent}_chr1_vi.mtx"], percent=["0.9","0.8","0.7","0.6","0.5","0.4","0.3","0.2","0.1","0.05","0.01"]),
rule get_chr1_bam:
input:
wholeGenomeBam = "output/alignment/mergedBam/mergedAll.bam"
output:
chr1_bam = "output/scalability/chr1_p1.bam",
chr1_bai = "output/scalability/chr1_p1.bam.bai"
resources:
cpus = 4,
mem = 5000
shell:
"""
samtools view -@ {resources.cpus} -b -o {output.chr1_bam} {input.wholeGenomeBam} chr1
samtools index -@ {resources.cpus} {output.chr1_bam}
"""
rule subSample_bam:
input:
bam="output/scalability/chr1_p1.bam",
bai = "output/scalability/chr1_p1.bam.bai"
output:
subBam="output/scalability/chr1_p{percent}.bam",
subBai="output/scalability/chr1_p{percent}.bam.bai"
resources:
cpus=4,
mem=5000
shell:
"""
samtools view -@ {resources.cpus} -s {wildcards.percent} -b -o {output.subBam} {input.bam}
samtools index -@ {resources.cpus} {output.subBam}
"""
rule get_chr1_pos:
input:
vcf="output/splittedChrVCF/SRR8454653.mkdup.sort.rg.filter.snps.castVar.bl6.nonVar.sorted.chr1.vcf.gz"
output:
region_chr1 = "output/scalability/chr1_pos.txt"
resources:
cpus = 1,
mem = 2000
shell:
"""
bcftools query -f '%CHROM\t%POS\n' {input.vcf} > {output.region_chr1}
"""
rule subset_chr1_pos:
input:
region_chr1 = "output/scalability/chr1_pos.txt"
output:
subregion_chr1 = "output/scalability/chr1_pos_{vcount}.txt"
resources:
cpus = 1,
mem = 2000
shell:
"""
cat {input.region_chr1} | awk "BEGIN {{srand()}} !/^$/ {{ if (rand() <= "{wildcards.vcount}") print \$0}}" > {output.subregion_chr1}
"""
rule subset_vcf:
input:
vcf="output/splittedChrVCF/SRR8454653.mkdup.sort.rg.filter.snps.castVar.bl6.nonVar.sorted.chr1.vcf.gz",
region_file="output/scalability/chr1_pos_{vcount}.txt"
output:
ovcf="output/scalability/chr1.pos.{vcount}.vcf.gz"
resources:
cpus=1,
mem=1000
shell:
"""
bcftools view {input.vcf} -R {input.region_file} -Oz -o {output.ovcf}
bcftools index {output.ovcf}
"""
rule run_sgcocaller_sgphase_varReads:
input:
bam="output/scalability/chr1_p{percent}.bam",
vcf="output/scalability/chr1.pos.0.3.vcf.gz",
barcodes="output/alignment/mergedBam/mergedAll.bam.barcodes.txt",
sgcocaller="debugSwphase/src/sgcocaller.out"
output:
phased_snpAnnot = "output/scalability/sgcocaller/varReads/{percent}/r{percent}_chr1_phased_snpAnnot.txt",
gtMtxfile = "output/scalability/sgcocaller/varReads/{percent}/r{percent}_chr1_gtMtx.mtx"
resources:
cpus=10,
mem=9000
log:
"output/scalability/benchmarks/varReads/{percent}_sgphase.log.txt"
benchmark:
repeat("output/scalability/benchmarks/varReads/{percent}_sgphase.benchmark.txt", 3)
shell:
"""
{input.sgcocaller} phase --threads {resources.cpus} --barcodeTag CB --chrom chr1 --minDP 2 --maxTotalDP 350 --maxDP 10 --minSNPdepth 5 --maxDissim 0.0099 \
{input.bam} {input.vcf} {input.barcodes} output/scalability/sgcocaller/varReads/{wildcards.percent}/r{wildcards.percent}_ 1> {log}
"""
rule run_sgcocaller_sgphase_varvcount:
input:
bam="output/scalability/chr1_p0.1.bam",
vcf="output/scalability/chr1.pos.{vcount}.vcf.gz",
barcodes="output/alignment/mergedBam/mergedAll.bam.barcodes.txt",
sgcocaller="debugSwphase/src/sgcocaller.out"
output:
phased_snpAnnot = "output/scalability/sgcocaller/varVariants/{vcount}/v{vcount}_chr1_phased_snpAnnot.txt",
gtMtxfile = "output/scalability/sgcocaller/varVariants/{vcount}/v{vcount}_chr1_gtMtx.mtx"
resources:
cpus=10,
mem=8000
log:
"output/scalability/benchmarks/varVariants/{vcount}_sgphase.log.txt"
benchmark:
repeat("output/scalability/benchmarks/varVariants/{vcount}_sgphase.benchmark.txt", 3)
shell:
"""
{input.sgcocaller} phase --threads {resources.cpus} --barcodeTag CB --chrom chr1 --minDP 2 --maxTotalDP 350 --maxDP 10 --minSNPdepth 5 --maxDissim 0.0099 \
{input.bam} {input.vcf} {input.barcodes} output/scalability/sgcocaller/varVariants/{wildcards.vcount}/v{wildcards.vcount}_ 1> {log}
"""
rule run_sgcocaller_swphase_varvcount:
input:
phased_snpAnnot = "output/scalability/sgcocaller/varVariants/{vcount}/v{vcount}_chr1_phased_snpAnnot.txt",
vcf="output/scalability/chr1.pos.{vcount}.vcf.gz",
sgcocaller="debugSwphase/src/sgcocaller.out",
gtMtxfile = "output/scalability/sgcocaller/varVariants/{vcount}/v{vcount}_chr1_gtMtx.mtx"
output:
"output/scalability/sgcocaller/swphase/varVariants/{vcount}/v{vcount}_chr1_corrected_phased_snpAnnot.txt"
benchmark:
repeat("output/scalability/benchmarks/swphase/varVariants/{vcount}_swphase.benchmark.txt", 3)
resources:
cpus = 10,
mem = 8000
log:
"output/scalability/benchmarks/swphase/varVariants/{vcount}_swphase.log"
shell:
"""
{input.sgcocaller} swphase --binSize 1000 --stepSize 800 --lookBeyondSnps 10 {input.gtMtxfile} {input.phased_snpAnnot} \
{input.vcf} output/scalability/sgcocaller/swphase/varVariants/{wildcards.vcount}/v{wildcards.vcount}_chr1 1> {log}
"""
rule run_sgcocaller_swphase_varReads:
input:
phased_snpAnnot = "output/scalability/sgcocaller/varReads/{percent}/r{percent}_chr1_phased_snpAnnot.txt",
gtMtxfile = "output/scalability/sgcocaller/varReads/{percent}/r{percent}_chr1_gtMtx.mtx",
vcf="output/scalability/chr1.pos.0.3.vcf.gz",
sgcocaller="debugSwphase/src/sgcocaller.out"
output:
"output/scalability/sgcocaller/swphase/varReads/{percent}/r{percent}_chr1_corrected_phased_snpAnnot.txt"
benchmark:
repeat( "output/scalability/benchmarks/swphase/varReads/{percent}_swphase.benchmark.txt", 3)
resources:
cpus = 10,
mem = 8000
log:
"output/scalability/benchmarks/swphase/varReads/{percent}_swphase.log"
shell:
"""
{input.sgcocaller} swphase --binSize 1000 --stepSize 800 --lookBeyondSnps 10 {input.gtMtxfile} {input.phased_snpAnnot} \
{input.vcf} output/scalability/sgcocaller/swphase/varReads/{wildcards.percent}/r{wildcards.percent}_chr1 1> {log}
"""
rule run_sgcocaller_sxo_varReads:
input:
sgphase_snp_annot = "output/scalability/sgcocaller/swphase/varReads/{percent}/r{percent}_chr1_corrected_phased_snpAnnot.txt",
bcfile="output/alignment/mergedBam/mergedAll.bam.barcodes.txt",
sgcocaller="debugSwphase/src/sgcocaller.out"
resources:
cpus = 10,
mem = 8000
output:
"output/scalability/sgcocaller/crossover/varReads/{percent}/r{percent}_chr1_vi.mtx"
benchmark:
repeat("output/scalability/benchmarks/crossover/varReads/{percent}_sxo.benchmark.txt",3)
log:
"output/scalability/benchmarks/crossover/varReads/{percent}_sxo.log"
shell:
"""
{input.sgcocaller} sxo --thetaREF 0.1 --thetaALT 0.9 --cmPmb 0.1 --chrom chr1 {input.sgphase_snp_annot} \
output/scalability/sgcocaller/varReads/{wildcards.percent}/r{wildcards.percent}_ {input.bcfile} output/scalability/sgcocaller/crossover/varReads/{wildcards.percent}/r{wildcards.percent} 1>{log}
"""
rule run_sgcocaller_sxo_varVariants:
input:
sgphase_snp_annot = "output/scalability/sgcocaller/swphase/varVariants/{vcount}/v{vcount}_chr1_corrected_phased_snpAnnot.txt",
bcfile="output/alignment/mergedBam/mergedAll.bam.barcodes.txt",
sgcocaller="debugSwphase/src/sgcocaller.out"
resources:
cpus = 10,
mem = 12000
output:
"output/scalability/sgcocaller/crossover/varVariants/{vcount}/v{vcount}_chr1_vi.mtx"
benchmark:
"output/scalability/benchmarks/crossover/varVariants/{vcount}_sxo.benchmark.txt"
log:
"output/scalability/benchmarks/crossover/varVariants/{vcount}_sxo.log"
shell:
"""
{input.sgcocaller} sxo --thetaREF 0.1 --thetaALT 0.9 --cmPmb 0.1 --chrom chr1 {input.sgphase_snp_annot} \
output/scalability/sgcocaller/varVariants/{wildcards.vcount}/v{wildcards.vcount}_ {input.bcfile} output/scalability/sgcocaller/crossover/varVariants/{wildcards.vcount}/v{wildcards.vcount} 1>{log}
"""
rule run_xo_varReads:
input:
subBam="output/scalability/chr1_p{percent}.bam",
bcFile="output/alignment/mergedBam/mergedAll.bam.barcodes.txt",
vcf="output/scalability/chr1.pos.0.3.vcf.gz",
sgcocaller="debugSwphase/src/sgcocaller.out"
resources:
cpus = 4,
mem = 12000
output:
"output/scalability/sgcocaller/xo_crossover/varReads/{percent}/r{percent}_chr1_vi.mtx"
benchmark:
repeat("output/scalability/benchmarks/xo_crossover/varReads/{percent}_xo.benchmark.txt", 3)
log:
"output/scalability/benchmarks/xo_crossover/varReads/{percent}_xo.log"
shell:
"""
{input.sgcocaller} xo --threads {resources.cpus} --maxTotalDP 350 --maxDP 10 --minDP 2 --minSNPdepth 5 --thetaREF 0.1 --thetaALT 0.9 --cmPmb 0.1 --chrom chr1 \
{input.subBam} {input.vcf} {input.bcFile} output/scalability/sgcocaller/xo_crossover/varReads/{wildcards.percent}/r{wildcards.percent} 1> {log}
"""
rule run_xo_varVariants:
input:
subBam="output/scalability/chr1_p0.1.bam",
bcFile="output/alignment/mergedBam/mergedAll.bam.barcodes.txt",
vcf="output/scalability/chr1.pos.{vcount}.vcf.gz",
sgcocaller="debugSwphase/src/sgcocaller.out"
resources:
cpus = 4,
mem = 12000
output:
"output/scalability/sgcocaller/xo_crossover/varVariants/{vcount}/v{vcount}_chr1_vi.mtx"
benchmark:
repeat("output/scalability/benchmarks/xo_crossover/varVariants/{vcount}_xo.benchmark.txt", 3)
log:
"output/scalability/benchmarks/xo_crossover/varVariants/{vcount}_xo.log"
shell:
"""
{input.sgcocaller} xo --threads {resources.cpus} --maxTotalDP 350 --maxDP 10 --minSNPdepth 5 --thetaREF 0.1 --thetaALT 0.9 --cmPmb 0.1 --chrom chr1 \
{input.subBam} {input.vcf} {input.bcFile} output/scalability/sgcocaller/xo_crossover/varVariants/{wildcards.vcount}/v{wildcards.vcount} 1> {log}
"""
#!/bin/bash
#SBATCH --job-name=runRDPhase
#SBATCH --ntasks=1
#SBATCH --time=324:10:00
#SBATCH --mem=1000
#SBATCH --partition=primary
/mnt/mcfiles/rlyu/Software/miniconda3.4.7/envs/rlyubase/bin/snakemake -s run_phase_reducedSNPs.snk --keep-going --rerun-incomplete -j 8 -c "sbatch --partition=primary --mem {resources.mem} --time=120:00:00 --cpus-per-task={resources.cpus} --error=reduceSNPsRunPhase.log.out"
#!/bin/bash
#SBATCH --job-name=ScalaDia
#SBATCH --ntasks=1
#SBATCH --time=324:10:00
#SBATCH --mem=1000
#SBATCH --partition=primary
/mnt/mcfiles/rlyu/Software/miniconda3.4.7/envs/rlyubase/bin/snakemake -s run_scalability.snk --rerun-incomplete -j 8 -c "sbatch --partition=primary --mem {resources.mem} --time=120:00:00 --cpus-per-task={resources.cpus} --error=Snakefile_scale.log.out"
#!/bin/bash
#SBATCH --job-name=hinchXO
#SBATCH --ntasks=1
#SBATCH --time=324:10:00
#SBATCH --mem=1000
#SBATCH --partition=primary
/mnt/mcfiles/rlyu/Software/miniconda3.4.7/envs/rlyubase/bin/snakemake -s run_sscocaller.snk --rerun-incomplete --use-singularity --singularity-args "-B /mnt/mcscratch/:/mnt/mcscratch/,/mnt/beegfs/:/mnt/beegfs/" -j 8 -c "sbatch --partition=primary --mem {resources.mem} --time=120:00:00 --cpus-per-task={resources.cpus} --error=Snakefile_sgcocaller.log.out"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment