Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
sscocaller
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
BioCellGen-public
sscocaller
Commits
1c69f700
Commit
1c69f700
authored
3 years ago
by
Ruqian Lyu
Browse files
Options
Downloads
Patches
Plain Diff
breakdown to modules and refactor
parent
460f8534
No related branches found
No related tags found
No related merge requests found
Pipeline
#6980
failed
3 years ago
Stage: buildStatic
Stage: testStatic
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
src/private/findpath.nim
+109
-0
109 additions, 0 deletions
src/private/findpath.nim
src/private/graph.nim
+90
-0
90 additions, 0 deletions
src/private/graph.nim
src/private/utils.nim
+108
-0
108 additions, 0 deletions
src/private/utils.nim
src/sscocaller.nim
+55
-348
55 additions, 348 deletions
src/sscocaller.nim
with
362 additions
and
348 deletions
src/private/findpath.nim
0 → 100755
+
109
−
0
View file @
1c69f700
## implements the traceback function
import
utils
from
graph
import
SpermViNodes
import
streams
import
tables
import
math
proc
pathTrackBack
*
(
currentSperm
:
var
SpermViNodes
,
ithSperm
:
int
,
thetaRef
:
float
,
thetaAlt
:
float
,
cmPmb
:
float
,
outFileVStateMtx
:
var
FileStream
,
viSegmentInfo
:
var
FileStream
,
posEnd
:
var
int64
,
inferProb
:
var
float
,
reverseProb
:
var
float
):
int
=
var
currentEm
,
prevEm
:
array
[
stateRef
..
stateAlt
,
float
]
var
posStart
:
int64
var
transitFlag
=
false
var
cSNP
=
1
var
ithSNP
:
int
var
transProb
:
float
var
rightGap
,
leftGap
:
array
[
0
..
1
,
float
]
for
i
in
1
..
high
(
currentSperm
.
viNodeseq
):
var
state
=
currentSperm
.
viNodeseq
[
^
i
]
.
state
currentSperm
.
viNodeseq
[
^(
i
+
1
)
]
.
state
=
currentSperm
.
viNodeseq
[
^
i
]
.
pathState
[
state
]
prevEm
=
getEmission
(
thetaRef
=
thetaRef
,
thetaAlt
=
thetaAlt
,
cRef
=
currentSperm
.
viNodeseq
[
^(
i
+
1
)
]
.
cRef
,
cAlt
=
currentSperm
.
viNodeseq
[
^(
i
+
1
)
]
.
cAlt
)
ithSNP
=
currentSperm
.
spermSnpIndexLookUp
[
high
(
currentSperm
.
viNodeseq
)
-
i
+
1
]
transProb
=
getTrans
(
currentSperm
.
viNodeseq
[
^(
i
+
1
)
]
.
pos
,
currentSperm
.
viNodeseq
[
^(
i
)
]
.
pos
,
cmPmb
=
cmPmb
)
if
currentSperm
.
viNodeseq
[
^(
i
+
1
)
]
.
state
==
stateRef
:
outFileVStateMtx
.
writeLine
(
$
ithSNP
&
" "
&
$
(
ithSperm
+
1
)
&
" 1"
)
if
state
==
stateRef
:
# not transitioning to a different state ie still in this segment of same state
inferProb
+=
prevEm
[
stateRef
]
reverseProb
+=
prevEm
[
stateAlt
]
cSNP
+=
1
transitFlag
=
false
#posStart = currentSperm[^(i+1)].pos
else
:
# there is transition to different state: ref(start) to alt(end) now output the segment info
posStart
=
currentSperm
.
viNodeseq
[
^(
i
)
]
.
pos
#leftGapSize = currentSperm[^(i)].pos - currentSperm[^(i+1)].pos
leftGap
=[
math
.
ln
(
transProb
),
math
.
ln
(
1
-
transProb
)
]
inferProb
+=
leftGap
[
0
]
reverseProb
+=
leftGap
[
1
]
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 2"
)
transitFlag
=
true
cSNP
=
1
rightGap
=
leftGap
inferProb
=
prevEm
[
stateRef
]+
rightGap
[
0
]
reverseProb
=
prevEm
[
stateAlt
]+
rightGap
[
1
]
posEnd
=
currentSperm
.
viNodeseq
[
^(
i
+
1
)
]
.
pos
else
:
outFileVStateMtx
.
writeLine
(
$
ithSNP
&
" "
&
$
(
ithSperm
+
1
)
&
" 2"
)
if
state
==
stateAlt
:
# not transitioning to a different state ie still in this segment of same state
inferProb
+=
prevEm
[
stateAlt
]
reverseProb
+=
prevEm
[
stateRef
]
cSNP
+=
1
transitFlag
=
false
else
:
## state transit
posStart
=
currentSperm
.
viNodeseq
[
^(
i
)
]
.
pos
leftGap
=[
math
.
ln
(
transProb
),
math
.
ln
(
1
-
transProb
)
]
inferProb
+=
leftGap
[
0
]
reverseProb
+=
leftGap
[
1
]
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 1"
)
transitFlag
=
true
cSNP
=
1
rightGap
=
leftGap
inferProb
=
prevEm
[
stateAlt
]+
rightGap
[
0
]
reverseProb
=
prevEm
[
stateRef
]+
rightGap
[
1
]
posEnd
=
currentSperm
.
viNodeseq
[
^(
i
+
1
)
]
.
pos
## traced to the start position of the chromosome for this cell
#leftGap = [0.0,0.0]
if
not
transitFlag
:
## the first SNP is included in the segment from traced from back
posStart
=
currentSperm
.
viNodeseq
[
0
]
.
pos
if
currentSperm
.
viNodeseq
[
0
]
.
state
==
stateRef
:
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 1"
)
else
:
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 2"
)
else
:
## the first node has different state from the second, the first node has its own segment
posStart
=
currentSperm
.
viNodeseq
[
0
]
.
pos
posEnd
=
posStart
cSNP
=
1
currentEm
=
getEmission
(
thetaRef
=
thetaRef
,
thetaAlt
=
thetaAlt
,
cRef
=
currentSperm
.
viNodeseq
[
0
]
.
cRef
,
cAlt
=
currentSperm
.
viNodeseq
[
0
]
.
cAlt
)
transProb
=
getTrans
(
currentSperm
.
viNodeseq
[
0
]
.
pos
,
currentSperm
.
viNodeseq
[
1
]
.
pos
,
cmPmb
=
cmPmb
)
if
currentSperm
.
viNodeseq
[
0
]
.
state
==
stateRef
:
inferProb
=
currentEm
[
stateRef
]+
math
.
ln
(
transProb
)
reverseProb
=
currentEm
[
stateAlt
]+
math
.
ln
(
1
-
transProb
)
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 1"
)
else
:
inferProb
=
currentEm
[
stateAlt
]+
math
.
ln
(
transProb
)
reverseProb
=
currentEm
[
stateRef
]+
math
.
ln
(
1
-
transProb
)
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 2"
)
return
0
This diff is collapsed.
Click to expand it.
src/private/graph.nim
0 → 100755
+
90
−
0
View file @
1c69f700
import
tables
import
utils
import
streams
import
hts
import
math
type
# Nucleotide = enum
# A, C, T, G
SpermViNodes
*
=
object
viNodeseq
*
:
seq
[
ViNode
]
## look up from SNPIndex to current Sperm SNP index
snpIndexLookUp
*
:
Table
[
int
,
int
]
## look up from current Sperm SNP index to SNPIndex
spermSnpIndexLookUp
*
:
Table
[
int
,
int
]
SeqSpermViNodes
*
=
seq
[
SpermViNodes
]
proc
addViNode
*
(
barcodeTable
:
OrderedTableRef
,
alleleCountTable
:
Table
[
string
,
allele_expr
]
,
scSpermSeq
:
var
SeqSpermViNodes
,
outFileTotalCountMtx
:
var
FileStream
,
outFileAltCountMtx
:
var
FileStream
,
nnsize
:
var
int
,
mindp
:
int
,
maxdp
:
int
,
snpIndex
:
int
,
thetaRef
:
float
,
thetaAlt
:
float
,
rec
:
Variant
,
initProb
:
array
,
cmPmb
:
float
):
int
=
for
bc
,
ac
in
alleleCountTable
.
pairs
:
# ac.tostring(acs)
## mindp, maxdp, they are values per cell
if
(
ac
.
cref
+
ac
.
calt
)
<=
mindp
or
(
ac
.
cref
+
ac
.
calt
)
>=
maxdp
:
continue
var
ithSperm
=
barcodeTable
[
bc
]
## write to mtx Ref count
outFileTotalCountMtx
.
writeLine
(
$
snpIndex
&
" "
&
$
(
ithSperm
+
1
)
&
" "
&
$
(
ac
.
cRef
+
ac
.
cAlt
))
outFileAltCountMtx
.
writeLine
(
$
snpIndex
&
" "
&
$
(
ithSperm
+
1
)
&
" "
&
$
ac
.
cAlt
)
nnsize
+=
1
var
emissionArray
=
getEmission
(
thetaRef
=
thetaRef
,
thetaAlt
=
thetaAlt
,
cRef
=
ac
.
cRef
,
cAlt
=
ac
.
cAlt
)
if
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
==
0
:
var
currentViNode
=
ViNode
()
currentViNode
.
pathScore
[
stateRef
]
=
math
.
ln
(
initProb
[
stateRef
]
)
+
emissionArray
[
stateRef
]
currentViNode
.
pathScore
[
stateAlt
]
=
math
.
ln
(
initProb
[
stateAlt
]
)
+
emissionArray
[
stateAlt
]
currentViNode
.
pathState
[
stateRef
]
=
stateN
currentViNode
.
pathState
[
stateAlt
]
=
stateN
currentViNode
.
state
=
stateN
currentViNode
.
pos
=
int
(
rec
.
POS
)
currentViNode
.
cAlt
=
int
(
ac
.
cAlt
)
currentViNode
.
cRef
=
int
(
ac
.
cRef
)
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
add
(
currentViNode
)
scSpermSeq
[
ithSperm
]
.
snpIndexLookUp
[
snpIndex
]
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
scSpermSeq
[
ithSperm
]
.
spermSnpIndexLookUp
[
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
]
=
snpIndex
else
:
let
preVNode
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
high
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
)
]
var
ltransProb
=
math
.
ln
(
getTrans
(
preVNode
.
pos
,
int
(
rec
.
POS
),
cmPmb
=
cmPmb
))
var
lnoTransProb
=
math
.
ln
(
1
-
getTrans
(
preVNode
.
pos
,
int
(
rec
.
POS
),
cmPmb
=
cmPmb
))
var
currentViNode
=
ViNode
()
# ref/alt -> ref
var
refTref
=
preVNode
.
pathScore
[
stateRef
]
+
lnoTransProb
var
altTref
=
preVNode
.
pathScore
[
stateAlt
]
+
ltransProb
if
refTref
>
altTref
:
currentViNode
.
pathScore
[
stateRef
]
=
refTref
currentViNode
.
pathState
[
stateRef
]
=
stateRef
else
:
currentViNode
.
pathScore
[
stateRef
]
=
altTref
currentViNode
.
pathState
[
stateRef
]
=
stateAlt
# ref/alt -> alt
var
refTalt
=
preVNode
.
pathScore
[
stateRef
]
+
ltransProb
var
altTalt
=
preVNode
.
pathScore
[
stateAlt
]
+
lnoTransProb
if
refTalt
>
altTalt
:
currentViNode
.
pathScore
[
stateAlt
]
=
refTalt
currentViNode
.
pathState
[
stateAlt
]
=
stateRef
else
:
currentViNode
.
pathScore
[
stateAlt
]
=
altTalt
currentViNode
.
pathState
[
stateAlt
]
=
stateAlt
currentViNode
.
pathScore
[
stateAlt
]
+=
emissionArray
[
stateAlt
]
currentViNode
.
pathScore
[
stateRef
]
+=
emissionArray
[
stateRef
]
currentViNode
.
cAlt
=
int
(
ac
.
cAlt
)
currentViNode
.
cRef
=
int
(
ac
.
cRef
)
currentViNode
.
pos
=
int
(
rec
.
POS
)
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
add
(
currentViNode
)
scSpermSeq
[
ithSperm
]
.
snpIndexLookUp
[
snpIndex
]
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
scSpermSeq
[
ithSperm
]
.
spermSnpIndexLookUp
[
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
]
=
snpIndex
return
0
# The size line
\ No newline at end of file
This diff is collapsed.
Click to expand it.
src/private/utils.nim
0 → 100755
+
108
−
0
View file @
1c69f700
from
distributions
/
rmath
import
dbinom
import
math
import
tables
import
hts
type
allele_expr
*
=
ref
object
cref
*
:
int
calt
*
:
int
ViState
*
=
enum
stateRef
,
stateAlt
,
stateN
ViNode
*
=
object
pos
*
:
int
cRef
*
:
int
cAlt
*
:
int
pathScore
*
:
array
[
stateRef
..
stateAlt
,
float
]
pathState
*
:
array
[
stateRef
..
stateAlt
,
ViState
]
state
*
:
ViState
# proc cref*(a:allele_expr):int = a.cref
# proc calt*(a:allele_expr):int = a.calt
proc
inc_count_cref
*
(
a
:
allele_expr
)
=
inc
(
a
.
cref
)
proc
inc_count_calt
*
(
a
:
allele_expr
)
=
inc
(
a
.
calt
)
# proc pathScore*(v:ViNode):array[stateRef..stateAlt, float] = v.pathScore
proc
getEmission
*
(
thetaRef
=
0.1
,
thetaAlt
=
0.9
,
cRef
:
int
,
cAlt
:
int
):
array
[
stateRef
..
stateAlt
,
float
]
=
var
emissionScore
=
[
dbinom
(
x
=
float
(
cAlt
),
size
=
(
cRef
+
cAlt
),
prob
=
thetaRef
,
log
=
true
),
dbinom
(
x
=
float
(
cAlt
),
size
=
(
cRef
+
cAlt
),
prob
=
thetaAlt
,
log
=
true
)
]
return
emissionScore
proc
getTrans
*
(
pos1
:
int64
,
pos2
:
int64
,
cmPmb
=
0.1
):
float
=
if
pos2
<
pos1
:
quit
"Wrong order of snps"
var
rec
=
1
-
math
.
exp
(
-
float
(
pos2
-
pos1
)
*
1e-8
*
cmPmb
)
# for autosomes 1*cmPbm cM per Mb
return
rec
#
proc
countAllele
*
(
rec
:
Variant
,
ibam
:
Bam
,
maxTotalReads
:
int
,
minTotalReads
:
int
,
chrom
:
string
,
mapq
:
int
,
barcodeTable
:
OrderedTableRef
,
minbsq
:
int
,
bulkBam
:
bool
,
barcodeTag
:
string
):
Table
[
string
,
allele_expr
]
=
var
alleleCountTable
=
initTable
[
string
,
allele_expr
]
()
var
rec_alt
:
char
var
total_reads
=
0
rec_alt
=
rec
.
ALT
[
0
][
0
]
for
aln
in
ibam
.
query
(
chrom
=
chrom
,
start
=
rec
.
POS
.
cint
-
1
,
stop
=
rec
.
POS
.
cint
):
var
cbt
=
tag
[
string
]
(
aln
,
barcodeTag
)
var
currentCB
:
string
if
cbt
.
isNone
:
# echo "no cb "
if
not
bulkBam
:
continue
else
:
currentCB
=
"bulk"
else
:
currentCB
=
cbt
.
get
if
aln
.
flag
.
unmapped
or
aln
.
mapping_quality
.
cint
<
mapq
or
aln
.
flag
.
dup
:
continue
## skip unmapped, duplicated, mapq low reads or aln.flag.secondary or aln.flag.supplementary
## if not aln.flag.proper_pair: continue
if
not
barcodeTable
.
hasKey
(
currentCB
):
continue
var
off
=
aln
.
start
.
int
qoff
=
0
roff_only
=
0
base
=
'n'
position
=
rec
.
POS
.
cint
over
=
0
for
event
in
aln
.
cigar
:
var
cons
=
event
.
consumes
#echo $cons
if
cons
.
query
:
qoff
+=
event
.
len
## the offs to the query sequences
if
cons
.
reference
:
off
+=
event
.
len
## the offs to the reference sequences
if
not
cons
.
query
:
roff_only
+=
event
.
len
if
off
<=
position
:
continue
over
=
off
-
position
# get the base
base
=
aln
.
base_at
(
qoff
-
over
-
1
)
break
if
aln
.
base_quality_at
(
qoff
-
over
-
1
).
cint
<
minbsq
:
continue
total_reads
+=
1
if
alleleCountTable
.
hasKey
(
currentCB
):
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec
.
REF
[
0
]
:
alleleCountTable
[
currentCB
]
.
inc_count_cref
continue
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec_alt
:
alleleCountTable
[
currentCB
]
.
inc_count_calt
continue
else
:
var
new_snp
=
allele_expr
(
cref
:
0
,
calt
:
0
)
alleleCountTable
[
currentCB
]
=
new_snp
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec
.
REF
[
0
]
:
alleleCountTable
[
currentCB
]
.
inc_count_cref
continue
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec_alt
:
alleleCountTable
[
currentCB
]
.
inc_count_calt
continue
if
total_reads
>
maxTotalReads
or
total_reads
<
minTotalReads
:
return
initTable
[
string
,
allele_expr
]
()
return
alleleCountTable
\ No newline at end of file
This diff is collapsed.
Click to expand it.
src/sscocaller.nim
+
55
−
348
View file @
1c69f700
...
@@ -12,123 +12,17 @@ import strutils
...
@@ -12,123 +12,17 @@ import strutils
import
hts
import
hts
import
tables
import
tables
import
sequtils
import
sequtils
import
private
/
utils
import
math
import
math
from
distributions
/
rmath
import
dbinom
import
streams
import
streams
import
private
/
graph
import
private
/
findpath
type
allele_expr
=
ref
object
cref
:
int
calt
:
int
type
# Nucleotide = enum
# A, C, T, G
ViState
=
enum
stateRef
,
stateAlt
,
stateN
ViNode
=
object
pos
:
int
cRef
:
int
cAlt
:
int
pathScore
:
array
[
stateRef
..
stateAlt
,
float
]
pathState
:
array
[
stateRef
..
stateAlt
,
ViState
]
state
:
ViState
SpermViNodes
=
object
viNodeseq
:
seq
[
ViNode
]
## look up from SNPIndex to current Sperm SNP index
snpIndexLookUp
:
Table
[
int
,
int
]
## look up from current Sperm SNP index to SNPIndex
spermSnpIndexLookUp
:
Table
[
int
,
int
]
SeqSpermViNodes
=
seq
[
SpermViNodes
]
proc
inc_count_cref
(
a
:
allele_expr
)
=
inc
(
a
.
cref
)
proc
inc_count_calt
(
a
:
allele_expr
)
=
inc
(
a
.
calt
)
proc
getEmission
(
thetaRef
=
0.1
,
thetaAlt
=
0.9
,
cRef
:
int
,
cAlt
:
int
):
array
[
stateRef
..
stateAlt
,
float
]
=
var
emissionScore
=
[
dbinom
(
x
=
float
(
cAlt
),
size
=
(
cRef
+
cAlt
),
prob
=
thetaRef
,
log
=
true
),
dbinom
(
x
=
float
(
cAlt
),
size
=
(
cRef
+
cAlt
),
prob
=
thetaAlt
,
log
=
true
)
]
return
emissionScore
proc
getTrans
(
pos1
:
int64
,
pos2
:
int64
,
cmPmb
=
0.1
):
float
=
if
pos2
<
pos1
:
quit
"Wrong order of snps"
var
rec
=
1
-
math
.
exp
(
-
float
(
pos2
-
pos1
)
*
1e-8
*
cmPmb
)
# for autosomes 1*cmPbm cM per Mb
return
rec
#
proc
countAllele
(
rec
:
Variant
,
ibam
:
Bam
,
maxTotalReads
:
int
,
minTotalReads
:
int
,
chrom
:
string
,
mapq
:
int
,
barcodeTable
:
OrderedTableRef
,
minbsq
:
int
,
bulkBam
:
bool
,
barcodeTag
:
string
):
Table
[
string
,
allele_expr
]
=
var
alleleCountTable
=
initTable
[
string
,
allele_expr
]
()
var
rec_alt
:
char
var
total_reads
=
0
rec_alt
=
rec
.
ALT
[
0
][
0
]
for
aln
in
ibam
.
query
(
chrom
=
chrom
,
start
=
rec
.
POS
.
cint
-
1
,
stop
=
rec
.
POS
.
cint
):
var
cbt
=
tag
[
string
]
(
aln
,
barcodeTag
)
var
currentCB
:
string
if
cbt
.
isNone
:
# echo "no cb "
if
not
bulkBam
:
continue
else
:
currentCB
=
"bulk"
else
:
currentCB
=
cbt
.
get
if
aln
.
flag
.
unmapped
or
aln
.
mapping_quality
.
cint
<
mapq
or
aln
.
flag
.
dup
:
continue
## skip unmapped, duplicated, mapq low reads or aln.flag.secondary or aln.flag.supplementary
## if not aln.flag.proper_pair: continue
if
not
barcodeTable
.
hasKey
(
currentCB
):
continue
var
off
=
aln
.
start
.
int
qoff
=
0
roff_only
=
0
base
=
'n'
position
=
rec
.
POS
.
cint
over
=
0
for
event
in
aln
.
cigar
:
var
cons
=
event
.
consumes
#echo $cons
if
cons
.
query
:
qoff
+=
event
.
len
## the offs to the query sequences
if
cons
.
reference
:
off
+=
event
.
len
## the offs to the reference sequences
if
not
cons
.
query
:
roff_only
+=
event
.
len
if
off
<=
position
:
continue
over
=
off
-
position
# get the base
base
=
aln
.
base_at
(
qoff
-
over
-
1
)
break
if
aln
.
base_quality_at
(
qoff
-
over
-
1
).
cint
<
minbsq
:
continue
total_reads
+=
1
if
alleleCountTable
.
hasKey
(
currentCB
):
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec
.
REF
[
0
]
:
alleleCountTable
[
currentCB
]
.
inc_count_cref
continue
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec_alt
:
alleleCountTable
[
currentCB
]
.
inc_count_calt
continue
else
:
var
new_snp
=
allele_expr
(
cref
:
0
,
calt
:
0
)
alleleCountTable
[
currentCB
]
=
new_snp
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec
.
REF
[
0
]
:
alleleCountTable
[
currentCB
]
.
inc_count_cref
continue
if
aln
.
base_at
(
qoff
-
over
-
1
)
==
rec_alt
:
alleleCountTable
[
currentCB
]
.
inc_count_calt
continue
if
total_reads
>
maxTotalReads
or
total_reads
<
minTotalReads
:
return
initTable
[
string
,
allele_expr
]
()
return
alleleCountTable
proc
sscocaller
(
threads
:
int
,
vcff
:
string
,
barcodeFile
:
string
,
proc
sscocaller
(
threads
:
int
,
vcff
:
string
,
barcodeFile
:
string
,
bamfile
:
string
,
out_dir
:
string
,
mapq
:
int
,
bamfile
:
string
,
out_dir
:
string
,
mapq
:
int
,
minbsq
:
int
,
mintotal
:
int
,
maxtotal
:
int
,
mindp
:
int
,
maxdp
:
int
,
minbsq
:
int
,
mintotal
:
int
,
maxtotal
:
int
,
mindp
:
int
,
maxdp
:
int
,
thetaREF
:
float
,
thetaALT
:
float
,
cmPmb
:
float
,
s_Chrs
:
seq
,
barcodeTag
:
string
):
int
=
thetaREF
:
float
,
thetaALT
:
float
,
cmPmb
:
float
,
s_Chrs
:
seq
,
barcodeTag
:
string
):
int
=
var
var
ibam
:
Bam
ibam
:
Bam
ivcf
:
VCF
ivcf
:
VCF
...
@@ -167,12 +61,8 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
...
@@ -167,12 +61,8 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
var
scSpermSeq
:
SeqSpermViNodes
var
scSpermSeq
:
SeqSpermViNodes
## matches with the order in barcodeTable
## matches with the order in barcodeTable
scSpermSeq
.
setLen
(
barcodeTable
.
len
)
scSpermSeq
.
setLen
(
barcodeTable
.
len
)
var
outFileSNPanno
:
FileStream
var
outFileSNPanno
,
outFileTotalCountMtx
,
outFileAltCountMtx
,
outFileVStateMtx
,
viSegmentInfo
:
FileStream
var
outFileTotalCountMtx
:
FileStream
var
outFileAltCountMtx
:
FileStream
# var outHeaderMtx:FileStream
var
outFileVStateMtx
:
FileStream
var
viSegmentInfo
:
FileStream
try
:
try
:
outFileSNPanno
=
openFileStream
(
out_dir
&
chrom
&
"_snpAnnot.txt"
,
fmWrite
)
outFileSNPanno
=
openFileStream
(
out_dir
&
chrom
&
"_snpAnnot.txt"
,
fmWrite
)
outFileTotalCountMtx
=
openFileStream
(
out_dir
&
chrom
&
"_totalCount.mtx"
,
fmWrite
)
outFileTotalCountMtx
=
openFileStream
(
out_dir
&
chrom
&
"_totalCount.mtx"
,
fmWrite
)
...
@@ -190,20 +80,14 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
...
@@ -190,20 +80,14 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
outFileAltCountMtx
.
writeLine
(
' '
.
repeat
(
50
))
outFileAltCountMtx
.
writeLine
(
' '
.
repeat
(
50
))
outFileVStateMtx
.
writeLine
(
"%%MatrixMarket matrix coordinate integer general"
)
outFileVStateMtx
.
writeLine
(
"%%MatrixMarket matrix coordinate integer general"
)
outFileVStateMtx
.
writeLine
(
' '
.
repeat
(
50
))
outFileVStateMtx
.
writeLine
(
' '
.
repeat
(
50
))
outFileSNPanno
.
writeLine
(
"POS"
&
"
\t
"
&
"REF"
&
"
\t
"
&
"ALT"
)
outFileSNPanno
.
writeLine
(
"POS"
&
"
\t
"
&
"REF"
&
"
\t
"
&
"ALT"
)
for
rec
in
ivcf
.
query
(
chrom
):
for
rec
in
ivcf
.
query
(
chrom
):
if
rec
.
ALT
.
len
>
1
:
continue
if
rec
.
ALT
.
len
>
1
:
continue
if
rec
.
ALT
.
len
==
0
:
continue
if
rec
.
ALT
.
len
==
0
:
continue
## alleleCountTable contains for this SNP.POS, each cell barcode's allele counts
## alleleCountTable contains for this SNP.POS, each cell barcode's allele counts
var
alleleCountTable
=
countAllele
(
rec
=
rec
,
ibam
=
ibam
,
var
alleleCountTable
=
countAllele
(
rec
=
rec
,
ibam
=
ibam
,
chrom
=
chrom
,
mapq
=
mapq
,
barcodeTable
=
barcodeTable
,
minbsq
=
minbsq
,
chrom
=
chrom
,
mapq
=
mapq
,
maxTotalReads
=
maxtotal
,
minTotalReads
=
mintotal
,
bulkBam
=
bulkBam
,
barcodeTag
=
barcodeTag
)
barcodeTable
=
barcodeTable
,
minbsq
=
minbsq
,
maxTotalReads
=
maxtotal
,
minTotalReads
=
mintotal
,
bulkBam
=
bulkBam
,
barcodeTag
=
barcodeTag
)
if
alleleCountTable
.
len
==
0
:
continue
if
alleleCountTable
.
len
==
0
:
continue
var
rec_alt
:
char
var
rec_alt
:
char
...
@@ -212,82 +96,24 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
...
@@ -212,82 +96,24 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
## add to snpAnnoSeq, later write to SNPannot file, which contains SNP.pos, SNP.ref,SNP.alt; The rowAnnotations
## add to snpAnnoSeq, later write to SNPannot file, which contains SNP.pos, SNP.ref,SNP.alt; The rowAnnotations
snpIndex
+=
1
snpIndex
+=
1
outFileSNPanno
.
writeLine
(
$
rec
.
POS
&
"
\t
"
&
$
rec
.
REF
[
0
]
&
"
\t
"
&
$
rec_alt
)
outFileSNPanno
.
writeLine
(
$
rec
.
POS
&
"
\t
"
&
$
rec
.
REF
[
0
]
&
"
\t
"
&
$
rec_alt
)
for
bc
,
ac
in
alleleCountTable
.
mpairs
:
discard
addViNode
(
barcodeTable
=
barcodeTable
,
alleleCountTable
=
alleleCountTable
,
scSpermSeq
=
scSpermSeq
,
# ac.tostring(acs)
outFileTotalCountMtx
=
outFileTotalCountMtx
,
outFileAltCountMtx
=
outFileAltCountMtx
,
nnsize
=
nnsize
,
## mindp, maxdp, they are values per cell
mindp
=
mindp
,
maxdp
=
maxdp
,
thetaRef
=
thetaRef
,
thetaAlt
=
thetaAlt
,
snpIndex
=
snpIndex
,
rec
=
rec
,
if
(
ac
.
cref
+
ac
.
calt
)
<=
mindp
or
(
ac
.
cref
+
ac
.
calt
)
>=
maxdp
:
continue
initProb
=
initProb
,
cmPmb
=
cmPmb
)
var
ithSperm
=
barcodeTable
[
bc
]
var
posEnd
:
int64
## write to mtx Ref count
var
inferProb
,
reverseProb
=
0.0
outFileTotalCountMtx
.
writeLine
(
$
snpIndex
&
" "
&
$
(
ithSperm
+
1
)
&
" "
&
$
(
ac
.
cRef
+
ac
.
cAlt
))
var
currentEm
:
array
[
stateRef
..
stateAlt
,
float
]
outFileAltCountMtx
.
writeLine
(
$
snpIndex
&
" "
&
$
(
ithSperm
+
1
)
&
" "
&
$
ac
.
cAlt
)
nnsize
+=
1
var
emissionArray
=
getEmission
(
thetaRef
=
thetaRef
,
thetaAlt
=
thetaAlt
,
cRef
=
ac
.
cRef
,
cAlt
=
ac
.
cAlt
)
if
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
==
0
:
var
currentViNode
=
ViNode
()
currentViNode
.
pathScore
[
stateRef
]
=
math
.
ln
(
initProb
[
stateRef
]
)
+
emissionArray
[
stateRef
]
currentViNode
.
pathScore
[
stateAlt
]
=
math
.
ln
(
initProb
[
stateAlt
]
)
+
emissionArray
[
stateAlt
]
currentViNode
.
pathState
[
stateRef
]
=
stateN
currentViNode
.
pathState
[
stateAlt
]
=
stateN
currentViNode
.
state
=
stateN
currentViNode
.
pos
=
int
(
rec
.
POS
)
currentViNode
.
cAlt
=
int
(
ac
.
cAlt
)
currentViNode
.
cRef
=
int
(
ac
.
cRef
)
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
add
(
currentViNode
)
scSpermSeq
[
ithSperm
]
.
snpIndexLookUp
[
snpIndex
]
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
scSpermSeq
[
ithSperm
]
.
spermSnpIndexLookUp
[
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
]
=
snpIndex
else
:
let
preVNode
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
high
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
)
]
var
ltransProb
=
math
.
ln
(
getTrans
(
preVNode
.
pos
,
int
(
rec
.
POS
),
cmPmb
=
cmPmb
))
var
lnoTransProb
=
math
.
ln
(
1
-
getTrans
(
preVNode
.
pos
,
int
(
rec
.
POS
),
cmPmb
=
cmPmb
))
var
currentViNode
=
ViNode
()
# ref/alt -> ref
var
refTref
=
preVNode
.
pathScore
[
stateRef
]
+
lnoTransProb
var
altTref
=
preVNode
.
pathScore
[
stateAlt
]
+
ltransProb
if
refTref
>
altTref
:
currentViNode
.
pathScore
[
stateRef
]
=
refTref
currentViNode
.
pathState
[
stateRef
]
=
stateRef
else
:
currentViNode
.
pathScore
[
stateRef
]
=
altTref
currentViNode
.
pathState
[
stateRef
]
=
stateAlt
# ref/alt -> alt
var
refTalt
=
preVNode
.
pathScore
[
stateRef
]
+
ltransProb
var
altTalt
=
preVNode
.
pathScore
[
stateAlt
]
+
lnoTransProb
if
refTalt
>
altTalt
:
currentViNode
.
pathScore
[
stateAlt
]
=
refTalt
currentViNode
.
pathState
[
stateAlt
]
=
stateRef
else
:
currentViNode
.
pathScore
[
stateAlt
]
=
altTalt
currentViNode
.
pathState
[
stateAlt
]
=
stateAlt
currentViNode
.
pathScore
[
stateAlt
]
+=
emissionArray
[
stateAlt
]
currentViNode
.
pathScore
[
stateRef
]
+=
emissionArray
[
stateRef
]
currentViNode
.
cAlt
=
int
(
ac
.
cAlt
)
currentViNode
.
cRef
=
int
(
ac
.
cRef
)
currentViNode
.
pos
=
int
(
rec
.
POS
)
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
add
(
currentViNode
)
scSpermSeq
[
ithSperm
]
.
snpIndexLookUp
[
snpIndex
]
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
scSpermSeq
[
ithSperm
]
.
spermSnpIndexLookUp
[
scSpermSeq
[
ithSperm
]
.
viNodeseq
.
len
]
=
snpIndex
# The size line
var
transitFlag
=
false
var
cSNP
=
0
var
posEnd
,
posStart
:
int64
var
inferProb
,
reverseProb
,
transProb
=
0.0
var
currentEm
,
prevEm
:
array
[
stateRef
..
stateAlt
,
float
]
var
lastNode
:
ViNode
var
lastNode
:
ViNode
var
spermVNseq
:
SpermViNodes
var
spermVNseq
:
SpermViNodes
var
ithSNP
:
int
var
ithSNP
:
int
## trans/no trans
## trans/no trans
var
rightGap
,
leftGap
:
array
[
0
..
1
,
float
]
for
ithSperm
in
0
..(
scSpermSeq
.
len
-
1
):
for
ithSperm
in
0
..(
scSpermSeq
.
len
-
1
):
## rightGap,leftGap = [0.0,0.0]
## rightGap,leftGap = [0.0,0.0]
spermVNseq
=
scSpermSeq
[
ithSperm
]
spermVNseq
=
scSpermSeq
[
ithSperm
]
if
spermVNseq
.
viNodeseq
.
len
==
0
:
continue
if
spermVNseq
.
viNodeseq
.
len
==
0
:
continue
lastNode
=
spermVNseq
.
viNodeseq
[
high
(
spermVNseq
.
viNodeseq
)
]
lastNode
=
spermVNseq
.
viNodeseq
[
high
(
spermVNseq
.
viNodeseq
)
]
currentEm
=
getEmission
(
thetaRef
=
thetaRef
,
thetaAlt
=
thetaAlt
,
currentEm
=
getEmission
(
thetaRef
=
thetaRef
,
thetaAlt
=
thetaAlt
,
cRef
=
lastNode
.
cRef
,
cAlt
=
lastNode
.
cAlt
)
cRef
=
lastNode
.
cRef
,
cAlt
=
lastNode
.
cAlt
)
if
lastNode
.
pathScore
[
stateRef
]
>
lastNode
.
pathScore
[
stateAlt
]
:
if
lastNode
.
pathScore
[
stateRef
]
>
lastNode
.
pathScore
[
stateAlt
]
:
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
high
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
)
]
.
state
=
stateRef
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
high
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
)
]
.
state
=
stateRef
ithSNP
=
scSpermSeq
[
ithSperm
]
.
spermSnpIndexLookUp
[
high
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
)
+
1
]
ithSNP
=
scSpermSeq
[
ithSperm
]
.
spermSnpIndexLookUp
[
high
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
)
+
1
]
...
@@ -301,90 +127,13 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
...
@@ -301,90 +127,13 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
inferProb
=
currentEm
[
stateAlt
]
inferProb
=
currentEm
[
stateAlt
]
reverseProb
=
currentEm
[
stateRef
]
reverseProb
=
currentEm
[
stateRef
]
posEnd
=
lastNode
.
pos
posEnd
=
lastNode
.
pos
cSNP
=
1
## call pathTrackBack will write the most probably hidden state seq and viterbi segment info to the
## traceback for yielding the most probable state sequence
## relevant File Streams.
for
i
in
1
..
high
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
):
var
state
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^
i
]
.
state
discard
pathTrackBack
(
currentSperm
=
scSpermSeq
[
ithSperm
]
,
ithSperm
=
ithSperm
,
thetaRef
=
thetaRef
,
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
+
1
)
]
.
state
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^
i
]
.
pathState
[
state
]
thetaAlt
=
thetaAlt
,
cmPmb
=
cmPmb
,
outFileVStateMtx
=
outFileVStateMtx
,
prevEm
=
getEmission
(
thetaRef
=
thetaRef
,
thetaAlt
=
thetaAlt
,
viSegmentInfo
=
viSegmentInfo
,
posEnd
=
posEnd
,
inferProb
=
inferProb
,
reverseProb
=
reverseProb
)
cRef
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
+
1
)
]
.
cRef
,
cAlt
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
+
1
)
]
.
cAlt
)
ithSNP
=
scSpermSeq
[
ithSperm
]
.
spermSnpIndexLookUp
[
high
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
)
-
i
+
1
]
transProb
=
getTrans
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
+
1
)
]
.
pos
,
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
)
]
.
pos
,
cmPmb
=
cmPmb
)
if
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
+
1
)
]
.
state
==
stateRef
:
outFileVStateMtx
.
writeLine
(
$
ithSNP
&
" "
&
$
(
ithSperm
+
1
)
&
" 1"
)
if
state
==
stateRef
:
# not transitioning to a different state ie still in this segment of same state
inferProb
+=
prevEm
[
stateRef
]
reverseProb
+=
prevEm
[
stateAlt
]
cSNP
+=
1
transitFlag
=
false
#posStart = scSpermSeq[ithSperm].viNodeseq[^(i+1)].pos
else
:
# there is transition to different state: ref(start) to alt(end) now output the segment info
posStart
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
)
]
.
pos
#leftGapSize = scSpermSeq[ithSperm].viNodeseq[^(i)].pos - scSpermSeq[ithSperm].viNodeseq[^(i+1)].pos
leftGap
=[
math
.
ln
(
transProb
),
math
.
ln
(
1
-
transProb
)
]
inferProb
+=
leftGap
[
0
]
reverseProb
+=
leftGap
[
1
]
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 2"
)
transitFlag
=
true
cSNP
=
1
rightGap
=
leftGap
inferProb
=
prevEm
[
stateRef
]+
rightGap
[
0
]
reverseProb
=
prevEm
[
stateAlt
]+
rightGap
[
1
]
posEnd
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
+
1
)
]
.
pos
else
:
outFileVStateMtx
.
writeLine
(
$
ithSNP
&
" "
&
$
(
ithSperm
+
1
)
&
" 2"
)
if
state
==
stateAlt
:
# not transitioning to a different state ie still in this segment of same state
inferProb
+=
prevEm
[
stateAlt
]
reverseProb
+=
prevEm
[
stateRef
]
cSNP
+=
1
transitFlag
=
false
else
:
## state transit
posStart
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
)
]
.
pos
leftGap
=[
math
.
ln
(
transProb
),
math
.
ln
(
1
-
transProb
)
]
inferProb
+=
leftGap
[
0
]
reverseProb
+=
leftGap
[
1
]
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 1"
)
transitFlag
=
true
cSNP
=
1
rightGap
=
leftGap
inferProb
=
prevEm
[
stateAlt
]+
rightGap
[
0
]
reverseProb
=
prevEm
[
stateRef
]+
rightGap
[
1
]
posEnd
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
^(
i
+
1
)
]
.
pos
## traced to the start position of the chromosome for this cell
#leftGap = [0.0,0.0]
if
not
transitFlag
:
## the first SNP is included in the segment from traced from back
posStart
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
0
]
.
pos
if
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
0
]
.
state
==
stateRef
:
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 1"
)
else
:
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 2"
)
else
:
## the first node has different state from the second, the first node has its own segment
posStart
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
0
]
.
pos
posEnd
=
posStart
cSNP
=
1
currentEm
=
getEmission
(
thetaRef
=
thetaRef
,
thetaAlt
=
thetaAlt
,
cRef
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
0
]
.
cRef
,
cAlt
=
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
0
]
.
cAlt
)
transProb
=
getTrans
(
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
0
]
.
pos
,
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
1
]
.
pos
,
cmPmb
=
cmPmb
)
if
scSpermSeq
[
ithSperm
]
.
viNodeseq
[
0
]
.
state
==
stateRef
:
inferProb
=
currentEm
[
stateRef
]+
math
.
ln
(
transProb
)
reverseProb
=
currentEm
[
stateAlt
]+
math
.
ln
(
1
-
transProb
)
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 1"
)
else
:
inferProb
=
currentEm
[
stateAlt
]+
math
.
ln
(
transProb
)
reverseProb
=
currentEm
[
stateRef
]+
math
.
ln
(
1
-
transProb
)
viSegmentInfo
.
writeLine
(
"ithSperm"
&
$
ithSperm
&
" "
&
$
posStart
&
" "
&
$
posEnd
&
" "
&
$
(
inferProb
-
reverseProb
)
&
" "
&
$
cSNP
&
" 2"
)
transitFlag
=
false
outFileTotalCountMtx
.
setPosition
(
49
)
outFileTotalCountMtx
.
setPosition
(
49
)
outFileVStateMtx
.
setPosition
(
49
)
outFileVStateMtx
.
setPosition
(
49
)
outFileAltCountMtx
.
setPosition
(
49
)
outFileAltCountMtx
.
setPosition
(
49
)
...
@@ -401,11 +150,10 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
...
@@ -401,11 +150,10 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
viSegmentInfo
.
close
()
viSegmentInfo
.
close
()
ibam
.
close
()
ibam
.
close
()
ivcf
.
close
()
ivcf
.
close
()
return
0
return
0
when
(
isMainModule
):
when
(
isMainModule
):
let
version
=
"0.2.
1
"
let
version
=
"0.2.
2
"
var
doc
=
format
(
"""
var
doc
=
format
(
"""
$version
$version
...
@@ -424,19 +172,19 @@ Arguments:
...
@@ -424,19 +172,19 @@ Arguments:
Options:
Options:
-t --threads <threads> number of BAM decompression threads [default: 4]
-t --threads <threads>
number of BAM decompression threads [default: 4]
-cb --cellbarcode <cellbarcode> the cell barcode tag, by default it is CB
-cb --cellbarcode <cellbarcode>
the cell barcode tag, by default it is CB
-MQ --minMAPQ <mapq> Minimum MAPQ for read filtering [default: 20]
-MQ --minMAPQ <mapq>
Minimum MAPQ for read filtering [default: 20]
-BQ --baseq <baseq> base quality threshold for a base to be used for counting [default: 13]
-BQ --baseq <baseq> base quality threshold for a base to be used for counting [default: 13]
-CHR --chrom <chrom> the selected chromsome (whole genome if not supplied,separate by comma if multiple chroms)
-CHR --chrom <chrom>
the selected chromsome (whole genome if not supplied,separate by comma if multiple chroms)
-minDP --minDP <minDP> the minimum DP for a SNP to be included in the output file [default: 1]
-minDP --minDP <minDP>
the minimum DP for a SNP to be included in the output file [default: 1]
-maxDP --maxDP <maxDP> the maximum DP for a SNP to be included in the output file [default: 5]
-maxDP --maxDP <maxDP>
the maximum DP for a SNP to be included in the output file [default: 5]
-maxTotalDP --maxTotalDP <maxTotalDP> the maximum DP across all barcodes for a SNP to be included in the output file [default:
25
]
-maxTotalDP --maxTotalDP <maxTotalDP>
the maximum DP across all barcodes for a SNP to be included in the output file [default:
30
]
-minTotalDP --minTotalDP <minTotalDP> the minimum DP across all barcodes for a SNP to be included in the output file [default: 10]
-minTotalDP --minTotalDP <minTotalDP>
the minimum DP across all barcodes for a SNP to be included in the output file [default: 10]
-chrName --chrName
<chrName>
the chr names with chr prefix or not, if not supplied then no prefix
-chrName --chrName the chr names with chr prefix or not, if not supplied then no prefix
-thetaREF --thetaREF <thetaREF> the theta for the binomial distribution conditioning on hidden state being REF [default: 0.1]
-thetaREF --thetaREF <thetaREF>
the theta for the binomial distribution conditioning on hidden state being REF [default: 0.1]
-thetaALT --thetaALT <thetaALT> the theta for the binomial distribution conditioning on hidden state being ALT [default: 0.9]
-thetaALT --thetaALT <thetaALT>
the theta for the binomial distribution conditioning on hidden state being ALT [default: 0.9]
-cmPmb --cmPmb <cmPmb> the average centiMorgan distances per megabases default 0.1 cm per Mb [default 0.1]
-cmPmb --cmPmb <cmPmb>
the average centiMorgan distances per megabases default 0.1 cm per Mb [default 0.1]
-h --help show help
-h --help show help
Examples
Examples
...
@@ -462,65 +210,27 @@ Options:
...
@@ -462,65 +210,27 @@ Options:
thetaALT
:
float
thetaALT
:
float
cmPmb
:
float
cmPmb
:
float
barcodeTag
=
"CB"
barcodeTag
=
"CB"
threads
=
parse_int
(
$
args
[
"--threads"
]
)
barcodeTag
=
$
args
[
"--barcodeTag"
]
mindp
=
parse_int
(
$
args
[
"--minDP"
]
)
maxdp
=
parse_int
(
$
args
[
"--maxDP"
]
)
maxtotal
=
parse_int
(
$
args
[
"--maxTotalDP"
]
)
mintotal
=
parse_int
(
$
args
[
"--minTotalDP"
]
)
bamfile
=
$
args
[
"<BAM>"
]
barcodeFile
=
$
args
[
"<barcodeFile>"
]
out_dir
=
$
args
[
"<out_prefix>"
]
vcff
=
$
args
[
"<VCF>"
]
mapq
=
parse_int
(
$
args
[
"--minMAPQ"
]
)
minbsq
=
parse_int
(
$
args
[
"--baseq"
]
)
thetaRef
=
parse_float
(
$
args
[
"--thetaREF"
]
)
thetaAlt
=
parse_float
(
$
args
[
"--thetaALT"
]
)
cmPmb
=
parse_float
(
$
args
[
"--cmPmb"
]
)
if
(
$
args
[
"--threads"
]
!=
"nil"
):
threads
=
parse_int
(
$
args
[
"--threads"
]
)
if
(
$
args
[
"--cellbarcode"
]
!=
"nil"
):
barcodeTag
=
$
args
[
"--barcodeTag"
]
else
:
threads
=
4
if
(
$
args
[
"--minDP"
]
!=
"nil"
):
mindp
=
parse_int
(
$
args
[
"--minDP"
]
)
else
:
mindp
=
1
if
(
$
args
[
"--maxDP"
]
!=
"nil"
):
maxdp
=
parse_int
(
$
args
[
"--maxDP"
]
)
else
:
maxdp
=
10
if
(
$
args
[
"--maxTotalDP"
]
!=
"nil"
):
maxtotal
=
parse_int
(
$
args
[
"--maxTotalDP"
]
)
else
:
maxtotal
=
30
if
(
$
args
[
"--minTotalDP"
]
!=
"nil"
):
mintotal
=
parse_int
(
$
args
[
"--minTotalDP"
]
)
else
:
mintotal
=
10
if
(
$
args
[
"<BAM>"
]
==
"nil"
):
quit
"input bam is required"
else
:
bamfile
=
$
args
[
"<BAM>"
]
if
(
$
args
[
"<barcodeFile>"
]
==
"nil"
):
quit
"input barcodeFile is required"
else
:
barcodeFile
=
$
args
[
"<barcodeFile>"
]
if
(
$
args
[
"<out_prefix>"
]
==
"nil"
):
quit
"output prefix is required"
else
:
out_dir
=
$
args
[
"<out_prefix>"
]
if
(
$
args
[
"<VCF>"
]
==
"nil"
):
quit
"input VCF file is required"
else
:
vcff
=
$
args
[
"<VCF>"
]
if
(
$
args
[
"--minMAPQ"
]
!=
"nil"
):
mapq
=
parse_int
(
$
args
[
"--minMAPQ"
]
)
else
:
mapq
=
20
if
(
$
args
[
"--baseq"
]
!=
"nil"
):
minbsq
=
parse_int
(
$
args
[
"--baseq"
]
)
else
:
minbsq
=
13
if
(
$
args
[
"--thetaREF"
]
!=
"nil"
):
thetaRef
=
parse_float
(
$
args
[
"--thetaREF"
]
)
else
:
thetaRef
=
0.1
if
(
$
args
[
"--thetaALT"
]
!=
"nil"
):
thetaAlt
=
parse_float
(
$
args
[
"--thetaALT"
]
)
else
:
thetaAlt
=
0.9
if
(
$
args
[
"--cmPmb"
]
!=
"nil"
):
cmPmb
=
parse_float
(
$
args
[
"--cmPmb"
]
)
else
:
cmPmb
=
0.1
if
(
$
args
[
"--chrom"
]
!=
"nil"
):
if
(
$
args
[
"--chrom"
]
!=
"nil"
):
selectedChrs
=
$
args
[
"--chrom"
]
selectedChrs
=
$
args
[
"--chrom"
]
else
:
else
:
let
a
=
toSeq
(
1
..
19
)
let
a
=
toSeq
(
1
..
19
)
if
(
$
args
[
"--chrName"
]
==
"nil"
):
if
(
not
args
[
"--chrName"
]
):
let
b
=
map
(
a
,
proc
(
x
:
int
):
string
=
""
&
$
x
)
let
b
=
map
(
a
,
proc
(
x
:
int
):
string
=
""
&
$
x
)
let
all_Chrs
=
concat
(
b
,
@[
"X"
,
"Y"
]
)
let
all_Chrs
=
concat
(
b
,
@[
"X"
,
"Y"
]
)
selectedChrs
=
all_Chrs
.
join
(
","
)
selectedChrs
=
all_Chrs
.
join
(
","
)
...
@@ -528,11 +238,8 @@ Options:
...
@@ -528,11 +238,8 @@ Options:
let
b
=
map
(
a
,
proc
(
x
:
int
):
string
=
"chr"
&
$
x
)
let
b
=
map
(
a
,
proc
(
x
:
int
):
string
=
"chr"
&
$
x
)
let
all_Chrs
=
concat
(
b
,
@[
"chrX"
,
"chrY"
]
)
let
all_Chrs
=
concat
(
b
,
@[
"chrX"
,
"chrY"
]
)
selectedChrs
=
all_Chrs
.
join
(
","
)
selectedChrs
=
all_Chrs
.
join
(
","
)
let
s_Chrs
=
selectedChrs
.
split
(
','
)
let
s_Chrs
=
selectedChrs
.
split
(
','
)
#echo $s_Chrs
#var args = commandLineParams()
discard
sscocaller
(
threads
,
vcff
,
barcodeFile
,
bamfile
,
discard
sscocaller
(
threads
,
vcff
,
barcodeFile
,
bamfile
,
out_dir
,
mapq
,
minbsq
,
mintotal
,
out_dir
,
mapq
,
minbsq
,
mintotal
,
maxtotal
,
mindp
,
maxdp
,
thetaREF
,
thetaALT
,
cmPmb
,
s_Chrs
,
barcodeTag
)
maxtotal
,
mindp
,
maxdp
,
thetaREF
,
thetaALT
,
cmPmb
,
s_Chrs
,
barcodeTag
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment