Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
sscocaller
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
BioCellGen-public
sscocaller
Commits
8624a917
Commit
8624a917
authored
3 years ago
by
Ruqian Lyu
Browse files
Options
Downloads
Patches
Plain Diff
add versioning and refactored code to have mainModule check
parent
71a7cea1
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Pipeline
#6669
passed
3 years ago
Stage: buildStatic
Stage: testStatic
Changes
1
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/sscocaller.nim
+246
-238
246 additions, 238 deletions
src/sscocaller.nim
with
246 additions
and
238 deletions
src/sscocaller.nim
+
246
−
238
View file @
8624a917
...
...
@@ -16,141 +16,127 @@ import math
from
distributions
/
rmath
import
dbinom
import
streams
#echo paramCount(), " ", paramStr(1)
type
allele_expr
=
ref
object
cref
:
int
calt
:
int
type
AlleleExpr
=
object
cRef
:
int
cAlt
:
int
# Nucleotide = enum
# A, C, T, G
ViState
=
enum
stateRef
,
stateAlt
,
stateN
ViNode
=
object
pos
:
int
cRef
:
int
cAlt
:
int
pathScore
:
array
[
stateRef
..
stateAlt
,
float
]
pathState
:
array
[
stateRef
..
stateAlt
,
ViState
]
state
:
ViState
SpermViNodes
=
object
viNodeseq
:
seq
[
ViNode
]
## look up from SNPIndex to current Sperm SNP index
snpIndexLookUp
:
Table
[
int
,
int
]
## look up from current Sperm SNP index to SNPIndex
spermSnpIndexLookUp
:
Table
[
int
,
int
]
SeqSpermViNodes
=
seq
[
SpermViNodes
]
#var alexprSeq:seq[AlleleExpr]
proc
sscocaller
(
argv
:
var
seq
[
string
]
):
int
=
let
doc
=
"""
Obtain allele counts for cell barcodes listed in barcodeFile at the SNP positions in VCF from the BAM file.
Usage:
sscocaller [options] <BAM> <VCF> <barcodeFile> <out_prefix>
Options:
-t --threads <threads> number of BAM decompression threads [default: 4]
-MQ --minMAPQ <mapq> Minimum MAPQ for read filtering [default: 20]
-BQ --baseq <baseq> base quality threshold for a base to be used for counting [default: 13]
-CHR --chrom <chrom> the selected chromsome (whole genome if not supplied,separate by comma if multiple chroms)
-minDP --minDP <minDP> the minimum DP for a SNP to be included in the output file [default: 1]
-maxDP --maxDP <maxDP> the maximum DP for a SNP to be included in the output file [default: 5]
-maxTotalDP --maxTotalDP <maxTotalDP> the maximum DP across all barcodes for a SNP to be included in the output file [default: 25]
-minTotalDP --minTotalDP <minTotalDP> the minimum DP across all barcodes for a SNP to be included in the output file [default: 10]
-chrName --chrName <chrName> the chr names with chr prefix or not, if not supplied then no prefix
-thetaREF --thetaREF <thetaREF> the theta for the binomial distribution conditioning on hidden state being REF [default: 0.1]
-thetaALT --thetaALT <thetaALT> the theta for the binomial distribution conditioning on hidden state being ALT [default: 0.9]
-cmPmb --cmPmb <cmPmb> the average centiMorgan distances per megabases default 0.1 cm per Mb [default 0.1]
-h --help show help
Examples
./sscocaller --threads 10 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/ccsnp-
"""
let
args
=
docopt
(
doc
,
argv
=
argv
)
var
ibam
:
Bam
threads
:
int
vcff
:
string
ivcf
:
VCF
barcodeFile
:
string
bamfile
:
string
selectedChrs
:
string
out_dir
:
string
mapq
:
int
minbsq
:
int
mintotal
:
int
maxtotal
:
int
mindp
:
int
maxdp
:
int
thetaREF
:
float
thetaALT
:
float
cmPmb
:
float
type
allele_expr
=
ref
object
cref
:
int
calt
:
int
proc
inc_count_cref
(
a
:
allele_expr
)
=
inc
(
a
.
cref
)
proc
inc_count_calt
(
a
:
allele_expr
)
=
inc
(
a
.
calt
)
proc
reset_count
(
a
:
allele_expr
)
=
a
.
calt
=
0
a
.
cref
=
0
# proc cref(a:allele_expr): int {.inline.} = return a.cref
# proc calt(a:allele_expr): int {.inline.} = return a.calt
proc
tostring
(
a
:
allele_expr
,
s
:
var
string
)
{.
inline
.}
=
s
.
set_len
(
0
)
s
.
add
(
"
\t
"
&
$
a
.
cref
&
","
&
$
a
.
calt
)
proc
inc_count_cref
(
a
:
allele_expr
)
=
inc
(
a
.
cref
)
proc
inc_count_calt
(
a
:
allele_expr
)
=
inc
(
a
.
calt
)
proc
reset_count
(
a
:
allele_expr
)
=
a
.
calt
=
0
a
.
cref
=
0
proc
tostring
(
a
:
allele_expr
,
s
:
var
string
)
{.
inline
.}
=
s
.
set_len
(
0
)
s
.
add
(
"
\t
"
&
$
a
.
cref
&
","
&
$
a
.
calt
)
proc
getEmission
(
thetaRef
=
0.1
,
thetaAlt
=
0.9
,
cRef
:
int
,
cAlt
:
int
):
array
[
stateRef
..
stateAlt
,
float
]
=
if
(
$
args
[
"--threads"
]
!=
"nil"
):
threads
=
parse_int
(
$
args
[
"--threads"
]
)
else
:
threads
=
4
if
(
$
args
[
"--minDP"
]
!=
"nil"
):
mindp
=
parse_int
(
$
args
[
"--minDP"
]
)
else
:
mindp
=
1
if
(
$
args
[
"--maxDP"
]
!=
"nil"
):
maxdp
=
parse_int
(
$
args
[
"--maxDP"
]
)
else
:
maxdp
=
10
var
emissionScore
=
[
dbinom
(
x
=
float
(
cAlt
),
size
=
(
cRef
+
cAlt
),
prob
=
thetaRef
,
log
=
true
),
dbinom
(
x
=
float
(
cAlt
),
size
=
(
cRef
+
cAlt
),
prob
=
thetaAlt
,
log
=
true
)
]
return
emissionScore
if
(
$
args
[
"--maxTotalDP"
]
!=
"nil"
):
maxtotal
=
parse_int
(
$
args
[
"--maxTotalDP"
]
)
else
:
maxtotal
=
30
if
(
$
args
[
"--minTotalDP"
]
!=
"nil"
):
mintotal
=
parse_int
(
$
args
[
"--minTotalDP"
]
)
else
:
mintotal
=
10
if
(
$
args
[
"<BAM>"
]
==
"nil"
):
quit
"input bam is required"
else
:
bamfile
=
$
args
[
"<BAM>"
]
if
(
$
args
[
"<barcodeFile>"
]
==
"nil"
):
quit
"input barcodeFile is required"
else
:
barcodeFile
=
$
args
[
"<barcodeFile>"
]
if
(
$
args
[
"<out_prefix>"
]
==
"nil"
):
quit
"output prefix is required"
else
:
out_dir
=
$
args
[
"<out_prefix>"
]
if
(
$
args
[
"<VCF>"
]
==
"nil"
):
quit
"input VCF file is required"
else
:
vcff
=
$
args
[
"<VCF>"
]
proc
getTrans
(
pos1
:
int64
,
pos2
:
int64
,
cmPmb
=
0.1
):
float
=
if
pos2
<
pos1
:
quit
"Wrong order of snps"
var
rec
=
1
-
math
.
exp
(
-
float
(
pos2
-
pos1
)
*
1e-8
*
cmPmb
)
# for autosomes 1*cmPbm cM per Mb
return
rec
#
proc
countAllele
(
rec
:
Variant
,
ibam
:
Bam
,
maxTotalReads
:
int
,
minTotalReads
:
int
,
chrom
:
string
,
mapq
:
int
,
barcodeTable
:
OrderedTableRef
,
minbsq
:
int
):
Table
[
string
,
allele_expr
]
=
var
alleleCountTable
=
initTable
[
string
,
allele_expr
]
()
var
rec_alt
:
char
var
total_reads
=
0
rec_alt
=
rec
.
ALT
[
0
][
0
]
for
aln
in
ibam
.
query
(
chrom
=
chrom
,
start
=
rec
.
POS
.
cint
-
1
,
stop
=
rec
.
POS
.
cint
):
var
cbt
=
tag
[
string
]
(
aln
,
"CB"
)
if
cbt
.
isNone
:
# echo "no cb "
continue
if
aln
.
flag
.
unmapped
or
aln
.
mapping_quality
.
cint
<
mapq
or
aln
.
flag
.
dup
:
continue
## skip unmapped, duplicated, mapq low reads or aln.flag.secondary or aln.flag.supplementary
## if not aln.flag.proper_pair: continue
if
(
$
args
[
"--minMAPQ"
]
!=
"nil"
):
mapq
=
parse_int
(
$
args
[
"--minMAPQ"
]
)
else
:
mapq
=
20
if
(
$
args
[
"--baseq"
]
!=
"nil"
):
minbsq
=
parse_int
(
$
args
[
"--baseq"
]
)
else
:
minbsq
=
13
if
(
$
args
[
"--thetaREF"
]
!=
"nil"
):
thetaRef
=
parse_float
(
$
args
[
"--thetaREF"
]
)
else
:
thetaRef
=
0.1
if
not
barcodeTable
.
hasKey
(
cbt
.
get
):
continue
var
off
=
aln
.
start
.
int
qoff
=
0
roff_only
=
0
base
=
'n'
position
=
rec
.
POS
.
cint
over
=
0
for
event
in
aln
.
cigar
:
var
cons
=
event
.
consumes
#echo $cons
if
cons
.
query
:
qoff
+=
event
.
len
## the offs to the query sequences
if
cons
.
reference
:
off
+=
event
.
len
## the offs to the reference sequences
if
not
cons
.
query
:
roff_only
+=
event
.
len
if
off
<=
position
:
continue
over
=
off
-
position
# get the base
base
=
aln
.
base_at
(
qoff
-
over
-
1
)
break
if
aln
.
base_quality_at
(
qoff
-
over
-
1
).
cint
<
minbsq
:
continue
total_reads
+=
1
if
alleleCountTable
.
hasKey
(
cbt
.
get
):
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec
.
REF
[
0
]
:
alleleCountTable
[
cbt
.
get
]
.
inc_count_cref
continue
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec_alt
:
alleleCountTable
[
cbt
.
get
]
.
inc_count_calt
continue
else
:
var
new_snp
=
allele_expr
(
cref
:
0
,
calt
:
0
)
alleleCountTable
[
cbt
.
get
]
=
new_snp
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec
.
REF
[
0
]
:
alleleCountTable
[
cbt
.
get
]
.
inc_count_cref
continue
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec_alt
:
alleleCountTable
[
cbt
.
get
]
.
inc_count_calt
continue
if
total_reads
>
maxTotalReads
or
total_reads
<
minTotalReads
:
return
initTable
[
string
,
allele_expr
]
()
return
alleleCountTable
proc
sscocaller
(
threads
:
int
,
vcff
:
string
,
barcodeFile
:
string
,
bamfile
:
string
,
out_dir
:
string
,
mapq
:
int
,
minbsq
:
int
,
mintotal
:
int
,
maxtotal
:
int
,
mindp
:
int
,
maxdp
:
int
,
thetaREF
:
float
,
thetaALT
:
float
,
cmPmb
:
float
,
s_Chrs
:
seq
):
int
=
if
(
$
args
[
"--thetaALT"
]
!=
"nil"
):
thetaAlt
=
parse_float
(
$
args
[
"--thetaALT"
]
)
else
:
thetaAlt
=
0.9
if
(
$
args
[
"--cmPmb"
]
!=
"nil"
):
cmPmb
=
parse_float
(
$
args
[
"--cmPmb"
]
)
else
:
cmPmb
=
0.1
echo
"min mapq "
,
mapq
,
" min base qual "
,
minbsq
if
(
$
args
[
"--chrom"
]
!=
"nil"
):
selectedChrs
=
$
args
[
"--chrom"
]
else
:
let
a
=
toSeq
(
1
..
19
)
if
(
$
args
[
"--chrName"
]
==
"nil"
):
let
b
=
map
(
a
,
proc
(
x
:
int
):
string
=
""
&
$
x
)
let
all_Chrs
=
concat
(
b
,
@[
"X"
,
"Y"
]
)
selectedChrs
=
all_Chrs
.
join
(
","
)
else
:
let
b
=
map
(
a
,
proc
(
x
:
int
):
string
=
"chr"
&
$
x
)
let
all_Chrs
=
concat
(
b
,
@[
"chrX"
,
"chrY"
]
)
selectedChrs
=
all_Chrs
.
join
(
","
)
let
s_Chrs
=
selectedChrs
.
split
(
','
)
echo
$
s_Chrs
var
ibam
:
Bam
ivcf
:
VCF
if
not
open
(
ibam
,
bamfile
,
threads
=
threads
,
index
=
true
):
quit
"couldn't open input bam"
if
not
open
(
ivcf
,
vcff
,
threads
=
threads
):
...
...
@@ -170,107 +156,9 @@ proc sscocaller(argv: var seq[string]): int =
if
$
kstr
.
s
[
0
]
==
"#"
:
continue
var
v
=
$
kstr
.
s
## barcodekey:spermIndex pair
discard
barcodeTable
.
hasKeyOrPut
(
v
,
ithSperm
)
ithSperm
+=
1
echo
"fetch given SNPs in "
,
barcodeTable
.
len
,
" single cells"
type
AlleleExpr
=
object
cRef
:
int
cAlt
:
int
# Nucleotide = enum
# A, C, T, G
ViState
=
enum
stateRef
,
stateAlt
,
stateN
ViNode
=
object
pos
:
int
cRef
:
int
cAlt
:
int
pathScore
:
array
[
stateRef
..
stateAlt
,
float
]
pathState
:
array
[
stateRef
..
stateAlt
,
ViState
]
state
:
ViState
SpermViNodes
=
object
viNodeseq
:
seq
[
ViNode
]
## look up from SNPIndex to current Sperm SNP index
snpIndexLookUp
:
Table
[
int
,
int
]
## look up from current Sperm SNP index to SNPIndex
spermSnpIndexLookUp
:
Table
[
int
,
int
]
ithSperm
+=
1
let
initProb
:
array
[
stateRef
..
stateAlt
,
float
]=[
0.5
,
0.5
]
var
alexprSeq
:
seq
[
AlleleExpr
]
proc
getEmission
(
thetaRef
=
0.1
,
thetaAlt
=
0.9
,
cRef
:
int
,
cAlt
:
int
):
array
[
stateRef
..
stateAlt
,
float
]
=
var
emissionScore
=
[
dbinom
(
x
=
float
(
cAlt
),
size
=
(
cRef
+
cAlt
),
prob
=
thetaRef
,
log
=
true
),
dbinom
(
x
=
float
(
cAlt
),
size
=
(
cRef
+
cAlt
),
prob
=
thetaAlt
,
log
=
true
)
]
return
emissionScore
#echo getEmission(thetaRef=thetaRef,thetaAlt=thetaAlt,cRef=10,cAlt=0)
proc
getTrans
(
pos1
:
int64
,
pos2
:
int64
,
cmPmb
=
0.1
):
float
=
if
pos2
<
pos1
:
quit
"Wrong order of snps"
var
rec
=
1
-
math
.
exp
(
-
float
(
pos2
-
pos1
)
*
1e-8
*
cmPmb
)
# for autosomes 1*cmPbm cM per Mb
return
rec
#
proc
countAllele
(
rec
:
Variant
,
ibam
:
Bam
,
maxTotalReads
:
int
,
minTotalReads
:
int
,
chrom
:
string
,
mapq
:
int
,
barcodeTable
:
OrderedTableRef
,
minbsq
:
int
):
Table
[
string
,
allele_expr
]
=
var
alleleCountTable
=
initTable
[
string
,
allele_expr
]
()
var
rec_alt
:
char
var
total_reads
=
0
rec_alt
=
rec
.
ALT
[
0
][
0
]
for
aln
in
ibam
.
query
(
chrom
=
chrom
,
start
=
rec
.
POS
.
cint
-
1
,
stop
=
rec
.
POS
.
cint
):
if
aln
.
flag
.
unmapped
or
aln
.
mapping_quality
.
cint
<
mapq
or
aln
.
flag
.
dup
:
continue
## skip unmapped, duplicated, mapq low reads or aln.flag.secondary or aln.flag.supplementary
## if not aln.flag.proper_pair: continue
var
cbt
=
tag
[
string
]
(
aln
,
"CB"
)
if
cbt
.
isNone
:
continue
if
not
barcodeTable
.
hasKey
(
cbt
.
get
):
continue
var
off
=
aln
.
start
.
int
qoff
=
0
roff_only
=
0
base
=
'n'
position
=
rec
.
POS
.
cint
over
=
0
for
event
in
aln
.
cigar
:
var
cons
=
event
.
consumes
if
cons
.
query
:
qoff
+=
event
.
len
## the offs to the query sequences
if
cons
.
reference
:
off
+=
event
.
len
## the offs to the reference sequences
# if not cons.query:
# roff_only += event.len
if
off
<=
position
:
continue
over
=
off
-
position
# get the base
#base = aln.base_at(qoff - over-1)
break
if
aln
.
base_quality_at
(
qoff
-
over
-
1
).
cint
<
minbsq
:
continue
total_reads
+=
1
if
alleleCountTable
.
hasKey
(
cbt
.
get
):
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec
.
REF
[
0
]
:
alleleCountTable
[
cbt
.
get
]
.
inc_count_cref
continue
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec_alt
:
alleleCountTable
[
cbt
.
get
]
.
inc_count_calt
continue
else
:
var
new_snp
=
allele_expr
(
cref
:
0
,
calt
:
0
)
alleleCountTable
[
cbt
.
get
]
=
new_snp
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec
.
REF
[
0
]
:
alleleCountTable
[
cbt
.
get
]
.
inc_count_cref
continue
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec_alt
:
alleleCountTable
[
cbt
.
get
]
.
inc_count_calt
continue
if
total_reads
>
maxTotalReads
or
total_reads
<
minTotalReads
:
return
initTable
[
string
,
allele_expr
]
()
return
alleleCountTable
## iterate through each selected chromosome
for
chrom
in
s_Chrs
:
# var scTable = initTable[string,SingleSpermRec]()
...
...
@@ -278,8 +166,7 @@ proc sscocaller(argv: var seq[string]): int =
var
spermIndex
=
0
## number of non zeros
var
nnsize
=
0
# var snpAnnoSeq:seq[SNPAnnot]
var
scSpermSeq
:
seq
[
SpermViNodes
]
var
scSpermSeq
:
SeqSpermViNodes
## matches with the order in barcodeTable
scSpermSeq
.
setLen
(
barcodeTable
.
len
)
var
outFileSNPanno
:
FileStream
...
...
@@ -338,7 +225,6 @@ proc sscocaller(argv: var seq[string]): int =
nnsize
+=
1
var
emissionArray
=
getEmission
(
thetaRef
=
thetaRef
,
thetaAlt
=
thetaAlt
,
cRef
=
ac
.
cRef
,
cAlt
=
ac
.
cAlt
)
if
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
==
0
:
echo
"first node to "
,
ithSperm
var
currentViNode
=
ViNode
()
currentViNode
.
pathScore
[
stateRef
]
=
math
.
ln
(
initProb
[
stateRef
]
)
+
emissionArray
[
stateRef
]
currentViNode
.
pathScore
[
stateAlt
]
=
math
.
ln
(
initProb
[
stateAlt
]
)
+
emissionArray
[
stateAlt
]
...
...
@@ -353,7 +239,6 @@ proc sscocaller(argv: var seq[string]): int =
scSpermSeq
[
ithSperm
]
.
snpIndexLookUp
[
snpIndex
]
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
scSpermSeq
[
ithSperm
]
.
spermSnpIndexLookUp
[
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
]
=
snpIndex
else
:
#echo "additional node to a cell"
let
preVNode
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
high
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
)
]
var
ltransProb
=
math
.
ln
(
getTrans
(
preVNode
.
pos
,
int
(
rec
.
POS
),
cmPmb
=
cmPmb
))
var
lnoTransProb
=
math
.
ln
(
1
-
getTrans
(
preVNode
.
pos
,
int
(
rec
.
POS
),
cmPmb
=
cmPmb
))
...
...
@@ -430,8 +315,6 @@ proc sscocaller(argv: var seq[string]): int =
transProb
=
getTrans
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
+
1
)
]
.
pos
,
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
)
]
.
pos
,
cmPmb
=
cmPmb
)
# echo inferProb
# echo reverseProb
if
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
+
1
)
]
.
state
==
stateRef
:
outFileVStateMtx
.
writeLine
(
$
ithSNP
&
" "
&
$
(
ithSperm
+
1
)
&
" 1"
)
if
state
==
stateRef
:
...
...
@@ -448,8 +331,6 @@ proc sscocaller(argv: var seq[string]): int =
inferProb
+=
leftGap
[
0
]
reverseProb
+=
leftGap
[
1
]
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 2"
)
#echo "ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(reverseProb/inferProb) & " " & $cSNP
#echo $(inferProb/reverseProb)
transitFlag
=
true
cSNP
=
1
rightGap
=
leftGap
...
...
@@ -524,6 +405,133 @@ proc sscocaller(argv: var seq[string]): int =
ivcf
.
close
()
return
0
when
(
isMainModule
):
let
version
=
"0.1.0"
var
doc
=
format
(
"""
$version
Usage:
sscocaller [options] <BAM> <VCF> <barcodeFile> <out_prefix>
Arguments:
<BAM> the read alignment file with records of single-cell DNA reads
<VCF> the variant call file with records of SNPs
<barcodeFile> the text file containing the list of cell barcodes
<out_prefix> the prefix of output files
Options:
-t --threads <threads> number of BAM decompression threads [default: 4]
-MQ --minMAPQ <mapq> Minimum MAPQ for read filtering [default: 20]
-BQ --baseq <baseq> base quality threshold for a base to be used for counting [default: 13]
-CHR --chrom <chrom> the selected chromsome (whole genome if not supplied,separate by comma if multiple chroms)
-minDP --minDP <minDP> the minimum DP for a SNP to be included in the output file [default: 1]
-maxDP --maxDP <maxDP> the maximum DP for a SNP to be included in the output file [default: 5]
-maxTotalDP --maxTotalDP <maxTotalDP> the maximum DP across all barcodes for a SNP to be included in the output file [default: 25]
-minTotalDP --minTotalDP <minTotalDP> the minimum DP across all barcodes for a SNP to be included in the output file [default: 10]
-chrName --chrName <chrName> the chr names with chr prefix or not, if not supplied then no prefix
-thetaREF --thetaREF <thetaREF> the theta for the binomial distribution conditioning on hidden state being REF [default: 0.1]
-thetaALT --thetaALT <thetaALT> the theta for the binomial distribution conditioning on hidden state being ALT [default: 0.9]
-cmPmb --cmPmb <cmPmb> the average centiMorgan distances per megabases default 0.1 cm per Mb [default 0.1]
-h --help show help
Examples
./sscocaller --threads 10 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/ccsnp-
"""
%
[
"version"
,
version
]
)
let
args
=
docopt
(
doc
,
version
=
version
)
var
threads
:
int
vcff
:
string
barcodeFile
:
string
bamfile
:
string
selectedChrs
:
string
out_dir
:
string
mapq
:
int
minbsq
:
int
mintotal
:
int
maxtotal
:
int
mindp
:
int
maxdp
:
int
thetaREF
:
float
thetaALT
:
float
cmPmb
:
float
if
(
$
args
[
"--threads"
]
!=
"nil"
):
threads
=
parse_int
(
$
args
[
"--threads"
]
)
else
:
threads
=
4
if
(
$
args
[
"--minDP"
]
!=
"nil"
):
mindp
=
parse_int
(
$
args
[
"--minDP"
]
)
else
:
mindp
=
1
if
(
$
args
[
"--maxDP"
]
!=
"nil"
):
maxdp
=
parse_int
(
$
args
[
"--maxDP"
]
)
else
:
maxdp
=
10
if
(
$
args
[
"--maxTotalDP"
]
!=
"nil"
):
maxtotal
=
parse_int
(
$
args
[
"--maxTotalDP"
]
)
else
:
maxtotal
=
30
if
(
$
args
[
"--minTotalDP"
]
!=
"nil"
):
mintotal
=
parse_int
(
$
args
[
"--minTotalDP"
]
)
else
:
mintotal
=
10
if
(
$
args
[
"<BAM>"
]
==
"nil"
):
quit
"input bam is required"
else
:
bamfile
=
$
args
[
"<BAM>"
]
if
(
$
args
[
"<barcodeFile>"
]
==
"nil"
):
quit
"input barcodeFile is required"
else
:
barcodeFile
=
$
args
[
"<barcodeFile>"
]
if
(
$
args
[
"<out_prefix>"
]
==
"nil"
):
quit
"output prefix is required"
else
:
out_dir
=
$
args
[
"<out_prefix>"
]
if
(
$
args
[
"<VCF>"
]
==
"nil"
):
quit
"input VCF file is required"
else
:
vcff
=
$
args
[
"<VCF>"
]
if
(
$
args
[
"--minMAPQ"
]
!=
"nil"
):
mapq
=
parse_int
(
$
args
[
"--minMAPQ"
]
)
else
:
mapq
=
20
if
(
$
args
[
"--baseq"
]
!=
"nil"
):
minbsq
=
parse_int
(
$
args
[
"--baseq"
]
)
else
:
minbsq
=
13
if
(
$
args
[
"--thetaREF"
]
!=
"nil"
):
thetaRef
=
parse_float
(
$
args
[
"--thetaREF"
]
)
else
:
thetaRef
=
0.1
if
(
$
args
[
"--thetaALT"
]
!=
"nil"
):
thetaAlt
=
parse_float
(
$
args
[
"--thetaALT"
]
)
else
:
thetaAlt
=
0.9
if
(
$
args
[
"--cmPmb"
]
!=
"nil"
):
cmPmb
=
parse_float
(
$
args
[
"--cmPmb"
]
)
else
:
cmPmb
=
0.1
if
(
$
args
[
"--chrom"
]
!=
"nil"
):
selectedChrs
=
$
args
[
"--chrom"
]
else
:
let
a
=
toSeq
(
1
..
19
)
if
(
$
args
[
"--chrName"
]
==
"nil"
):
let
b
=
map
(
a
,
proc
(
x
:
int
):
string
=
""
&
$
x
)
let
all_Chrs
=
concat
(
b
,
@[
"X"
,
"Y"
]
)
selectedChrs
=
all_Chrs
.
join
(
","
)
else
:
let
b
=
map
(
a
,
proc
(
x
:
int
):
string
=
"chr"
&
$
x
)
let
all_Chrs
=
concat
(
b
,
@[
"chrX"
,
"chrY"
]
)
selectedChrs
=
all_Chrs
.
join
(
","
)
var
args
=
commandLineParams
()
discard
sscocaller
(
args
)
let
s_Chrs
=
selectedChrs
.
split
(
','
)
#echo $s_Chrs
#var args = commandLineParams()
discard
sscocaller
(
threads
,
vcff
,
barcodeFile
,
bamfile
,
out_dir
,
mapq
,
minbsq
,
mintotal
,
maxtotal
,
mindp
,
maxdp
,
thetaREF
,
thetaALT
,
cmPmb
,
s_Chrs
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment