sscocaller.nim 23 KB
Newer Older
Ruqian Lyu's avatar
Ruqian Lyu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
## Process the large bam that contains reads from all barcodes and get allele counts
## for the list of SNPs provided in a VCF file
## Generate per cell per chr Allele counts
## Runs viterbi algorithm and output sequence of inferred viterbi state
## Authror: Ruqian Lyu
## Date: 04/11/2020


import os
import docopt
import strutils
import hts
import tables
import sequtils
import math
Ruqian Lyu's avatar
Ruqian Lyu committed
16
from distributions/rmath import dbinom
Ruqian Lyu's avatar
Ruqian Lyu committed
17
18
import streams

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
type 
  allele_expr = ref object
    cref: int
    calt: int
type 
  # Nucleotide = enum
  #   A, C, T, G 
  ViState = enum
    stateRef, stateAlt, stateN
  ViNode = object
    pos: int
    cRef: int
    cAlt: int
    pathScore: array[stateRef..stateAlt, float]
    pathState: array[stateRef..stateAlt, ViState]      
    state: ViState     
  SpermViNodes = object
    viNodeseq: seq[ViNode]
    ## look up from SNPIndex to current Sperm SNP index
    snpIndexLookUp: Table[int,int]
    ## look up from current Sperm SNP index to SNPIndex  
    spermSnpIndexLookUp: Table[int,int]
  SeqSpermViNodes = seq[SpermViNodes]
Ruqian Lyu's avatar
Ruqian Lyu committed
42

43
44
proc inc_count_cref(a:allele_expr) = inc(a.cref)
proc inc_count_calt(a:allele_expr) = inc(a.calt)
Ruqian Lyu's avatar
Ruqian Lyu committed
45

46
47
proc getEmission(thetaRef=0.1,thetaAlt=0.9,
                  cRef:int,cAlt:int): array[stateRef..stateAlt, float] =
Ruqian Lyu's avatar
Ruqian Lyu committed
48
  
49
50
51
  var emissionScore = [dbinom(x=float(cAlt),size=(cRef+cAlt),prob=thetaRef,log=true),
                        dbinom(x=float(cAlt),size=(cRef+cAlt),prob=thetaAlt,log=true)]
  return emissionScore
Ruqian Lyu's avatar
Ruqian Lyu committed
52

53
54
55
56
57
58
59
60
61
proc getTrans(pos1:int64,pos2:int64,cmPmb=0.1): float = 
  if pos2<pos1:
    quit "Wrong order of snps"
  var rec = 1-math.exp(-float(pos2-pos1)*1e-8*cmPmb) # for autosomes 1*cmPbm cM per Mb 
  return rec
                   #   
proc countAllele(rec:Variant, ibam:Bam, maxTotalReads:int,
                  minTotalReads:int,
                  chrom:string, mapq: int,
Ruqian Lyu's avatar
Ruqian Lyu committed
62
                  barcodeTable:OrderedTableRef,minbsq:int,bulkBam:bool,barcodeTag:string): Table[string,allele_expr] =
63
64
65
66
67
  var alleleCountTable = initTable[string,allele_expr]()
  var rec_alt:char
  var total_reads=0
  rec_alt = rec.ALT[0][0]
  for aln in ibam.query(chrom = chrom,start = rec.POS.cint-1, stop = rec.POS.cint):
Ruqian Lyu's avatar
Ruqian Lyu committed
68
    var cbt = tag[string](aln, barcodeTag)
Ruqian Lyu's avatar
Ruqian Lyu committed
69
    var currentCB: string
70
71
    if cbt.isNone:
    #  echo "no cb "
Ruqian Lyu's avatar
Ruqian Lyu committed
72
73
74
75
76
77
      if not bulkBam:
        continue  
      else:
        currentCB = "bulk"
    else:
      currentCB = cbt.get  
78
79
80
    if aln.flag.unmapped or aln.mapping_quality.cint < mapq or aln.flag.dup: continue
    ## skip unmapped, duplicated, mapq low reads or aln.flag.secondary or aln.flag.supplementary
    ## if not aln.flag.proper_pair: continue
Ruqian Lyu's avatar
Ruqian Lyu committed
81
    if not barcodeTable.hasKey(currentCB): 
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
      continue
    var 
      off = aln.start.int
      qoff = 0
      roff_only = 0
      base = 'n'
      position = rec.POS.cint
      over = 0
    for event in aln.cigar:
      var cons = event.consumes
      #echo $cons
      if cons.query:
        qoff+=event.len ## the offs to the query sequences
      if cons.reference:
        off += event.len ## the offs to the reference sequences
        if not cons.query:
          roff_only += event.len
      if off <= position:
        continue
      over = off - position
      # get the base 
      base = aln.base_at(qoff - over-1)
      break
    if aln.base_quality_at(qoff - over-1).cint < minbsq: 
      continue
    total_reads+=1
Ruqian Lyu's avatar
Ruqian Lyu committed
108
    if alleleCountTable.hasKey(currentCB):
109
      if aln.base_at(qoff - over-1) == rec.REF[0]: 
Ruqian Lyu's avatar
Ruqian Lyu committed
110
        alleleCountTable[currentCB].inc_count_cref
111
112
        continue
      if aln.base_at(qoff - over-1) == rec_alt: 
Ruqian Lyu's avatar
Ruqian Lyu committed
113
        alleleCountTable[currentCB].inc_count_calt
114
115
116
        continue
    else:
      var new_snp = allele_expr(cref:0,calt:0)
Ruqian Lyu's avatar
Ruqian Lyu committed
117
      alleleCountTable[currentCB] = new_snp
118
      if aln.base_at(qoff - over-1) == rec.REF[0]: 
Ruqian Lyu's avatar
Ruqian Lyu committed
119
        alleleCountTable[currentCB].inc_count_cref
120
121
        continue
      if aln.base_at(qoff - over-1) == rec_alt: 
Ruqian Lyu's avatar
Ruqian Lyu committed
122
        alleleCountTable[currentCB].inc_count_calt
123
124
125
126
127
128
129
        continue
  if total_reads > maxTotalReads or total_reads < minTotalReads:
    return initTable[string,allele_expr]()
  return alleleCountTable
proc sscocaller(threads:int, vcff:string, barcodeFile:string,
                bamfile:string, out_dir:string, mapq:int, 
                minbsq:int, mintotal:int, maxtotal:int, mindp:int, maxdp:int, 
Ruqian Lyu's avatar
Ruqian Lyu committed
130
                thetaREF:float, thetaALT:float, cmPmb:float,s_Chrs:seq,barcodeTag:string): int =
Ruqian Lyu's avatar
Ruqian Lyu committed
131

132
133
134
  var 
    ibam:Bam
    ivcf:VCF
Ruqian Lyu's avatar
Ruqian Lyu committed
135
136
137
138
139
140
141
142
143
144
145
146
147
148
  if not open(ibam, bamfile, threads=threads, index = true):
      quit "couldn't open input bam"
  if not open(ivcf, vcff, threads=threads):
      quit "couldn't open: vcff"

  var hf = hts.hts_open(cstring(barcodeFile), "r")
  #### TODO : Table size
  var barcodeTable =  newOrderedTable[string,int](initialSize = 1024)
  # var outFileTable = initTable[string,File]()
  var kstr: hts.kstring_t
  kstr.l = 0
  kstr.m = 0
  kstr.s = nil
  var ithSperm=0
Ruqian Lyu's avatar
Ruqian Lyu committed
149
  var bulkBam = false
Ruqian Lyu's avatar
Ruqian Lyu committed
150
151
152
153
154
155
  ## initiate the table with CB as has keys, allele counts (ref object) as elements
  while hts_getline(hf, cint(10), addr kstr) > 0:
    if $kstr.s[0] == "#":
      continue
    var v = $kstr.s
    discard barcodeTable.hasKeyOrPut(v, ithSperm)
156
    ithSperm+=1 
Ruqian Lyu's avatar
Ruqian Lyu committed
157
158
159
  if barcodeTable.len == 1 and barcodeTable.hasKey("bulk"):
    echo "running in bulk mode and all reads are regarded as from one sample/cell"
    bulkBam = true
Ruqian Lyu's avatar
Ruqian Lyu committed
160
161
162
163
164
165
166
  let initProb:array[stateRef..stateAlt, float]=[0.5,0.5]
  ## iterate through each selected chromosome
  for chrom in s_Chrs:
    # var scTable = initTable[string,SingleSpermRec]()
    var snpIndex = 0
    ## number of non zeros
    var nnsize = 0
167
    var scSpermSeq:SeqSpermViNodes
Ruqian Lyu's avatar
Ruqian Lyu committed
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
    ## matches with the order in barcodeTable
    scSpermSeq.setLen(barcodeTable.len)
    var outFileSNPanno:FileStream
    var outFileTotalCountMtx:FileStream
    var outFileAltCountMtx:FileStream
    # var outHeaderMtx:FileStream
    var outFileVStateMtx:FileStream
    var viSegmentInfo: FileStream
    try:
      outFileSNPanno = openFileStream(out_dir & chrom & "_snpAnnot.txt", fmWrite)
      outFileTotalCountMtx = openFileStream(out_dir & chrom & "_totalCount.mtx", fmWrite)
      outFileAltCountMtx = openFileStream(out_dir & chrom & "_altCount.mtx", fmWrite)
      outFileVStateMtx = openFileStream(out_dir & chrom & "_vi.mtx", fmWrite)
      viSegmentInfo = openFileStream(out_dir & chrom & "_viSegInfo.txt", fmWrite)
      #echo strm.readLine()
    except:
      stderr.write getCurrentExceptionMsg()
    
    ## write headers to those mtx files and the first line place holder for total_row total_column total_entry
    outFileTotalCountMtx.writeLine("%%MatrixMarket matrix coordinate integer general")
    outFileTotalCountMtx.writeLine(' '.repeat(50))
    outFileAltCountMtx.writeLine("%%MatrixMarket matrix coordinate integer general")
    outFileAltCountMtx.writeLine(' '.repeat(50))
    outFileVStateMtx.writeLine("%%MatrixMarket matrix coordinate integer general")
    outFileVStateMtx.writeLine(' '.repeat(50))

    outFileSNPanno.writeLine("POS" & "\t" & "REF" & "\t" & "ALT" )
    for rec in ivcf.query(chrom):
      if rec.ALT.len > 1: continue
      if rec.ALT.len == 0: continue
      ## alleleCountTable contains for this SNP.POS, each cell barcode's allele counts 
      var alleleCountTable = countAllele(rec=rec, ibam=ibam, 
                                         chrom=chrom, mapq=mapq, 
                                         barcodeTable=barcodeTable,
Ruqian Lyu's avatar
Ruqian Lyu committed
202
203
                                         minbsq=minbsq,
                                         maxTotalReads = maxtotal,
Ruqian Lyu's avatar
Ruqian Lyu committed
204
                                         minTotalReads = mintotal,
Ruqian Lyu's avatar
Ruqian Lyu committed
205
206
                                         bulkBam = bulkBam,
                                         barcodeTag = barcodeTag)
Ruqian Lyu's avatar
Ruqian Lyu committed
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
      if alleleCountTable.len==0: continue

      var rec_alt:char
      rec_alt = rec.ALT[0][0]
      # out_line = $rec.CHROM & "\t" & $rec.POS & "\t" & rec.REF[0] & "\t" & rec_alt
      ## add to snpAnnoSeq, later write to SNPannot file, which contains SNP.pos, SNP.ref,SNP.alt; The rowAnnotations
      snpIndex += 1
      outFileSNPanno.writeLine($rec.POS & "\t" & $rec.REF[0] & "\t" & $rec_alt )
      for bc, ac in alleleCountTable.mpairs:
        # ac.tostring(acs)
        ## mindp, maxdp, they are values per cell
        if (ac.cref+ac.calt) <= mindp or (ac.cref+ac.calt) >= maxdp: continue        
        var ithSperm = barcodeTable[bc]
        ## write to mtx Ref count
        outFileTotalCountMtx.writeLine($snpIndex & " " & $(ithSperm+1) & " " & $(ac.cRef+ac.cAlt))
        outFileAltCountMtx.writeLine($snpIndex & " " & $(ithSperm+1) & " " & $ac.cAlt)
        nnsize += 1
        var emissionArray = getEmission(thetaRef=thetaRef,thetaAlt=thetaAlt,cRef=ac.cRef,cAlt=ac.cAlt)
        if scSpermSeq[ithSperm].viNodeseq.len==0:
          var currentViNode = ViNode()
Ruqian Lyu's avatar
Ruqian Lyu committed
227
228
          currentViNode.pathScore[stateRef] = math.ln(initProb[stateRef])+emissionArray[stateRef]
          currentViNode.pathScore[stateAlt] = math.ln(initProb[stateAlt])+emissionArray[stateAlt]
Ruqian Lyu's avatar
Ruqian Lyu committed
229
230
231
232
233
234
235
236
237
238
239
240
          currentViNode.pathState[stateRef] = stateN
          currentViNode.pathState[stateAlt] = stateN
          currentViNode.state = stateN
          currentViNode.pos = int(rec.POS)
          currentViNode.cAlt = int(ac.cAlt)
          currentViNode.cRef = int(ac.cRef)
          
          scSpermSeq[ithSperm].viNodeseq.add(currentViNode)
          scSpermSeq[ithSperm].snpIndexLookUp[snpIndex] = scSpermSeq[ithSperm].viNodeseq.len
          scSpermSeq[ithSperm].spermSnpIndexLookUp[scSpermSeq[ithSperm].viNodeseq.len] = snpIndex
        else:
          let preVNode = scSpermSeq[ithSperm].viNodeseq[high(scSpermSeq[ithSperm].viNodeseq)]
Ruqian Lyu's avatar
Ruqian Lyu committed
241
242
          var ltransProb = math.ln(getTrans(preVNode.pos,int(rec.POS),cmPmb=cmPmb))
          var lnoTransProb = math.ln(1-getTrans(preVNode.pos,int(rec.POS),cmPmb=cmPmb))
Ruqian Lyu's avatar
Ruqian Lyu committed
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
          var currentViNode = ViNode()
            # ref/alt -> ref
          var refTref = preVNode.pathScore[stateRef] + lnoTransProb
          var altTref = preVNode.pathScore[stateAlt] + ltransProb
          if refTref > altTref:
            currentViNode.pathScore[stateRef] = refTref
            currentViNode.pathState[stateRef] = stateRef
          else:
            currentViNode.pathScore[stateRef] = altTref
            currentViNode.pathState[stateRef] = stateAlt
            # ref/alt -> alt
          var refTalt = preVNode.pathScore[stateRef] + ltransProb
          var altTalt = preVNode.pathScore[stateAlt] + lnoTransProb
            
          if refTalt > altTalt:
            currentViNode.pathScore[stateAlt] = refTalt
            currentViNode.pathState[stateAlt] = stateRef
          else:
            currentViNode.pathScore[stateAlt] = altTalt
            currentViNode.pathState[stateAlt] = stateAlt
          currentViNode.pathScore[stateAlt] += emissionArray[stateAlt]
          currentViNode.pathScore[stateRef] += emissionArray[stateRef]
          currentViNode.cAlt = int(ac.cAlt)
          currentViNode.cRef = int(ac.cRef)
          currentViNode.pos = int(rec.POS)
          scSpermSeq[ithSperm].viNodeseq.add(currentViNode)
          scSpermSeq[ithSperm].snpIndexLookUp[snpIndex] = scSpermSeq[ithSperm].viNodeseq.len
          scSpermSeq[ithSperm].spermSnpIndexLookUp[scSpermSeq[ithSperm].viNodeseq.len] = snpIndex
    # The size line  
    
    var transitFlag = false
    var cSNP = 0
    var posEnd,posStart:int64
    var inferProb,reverseProb,transProb = 0.0
    var currentEm,prevEm:  array[stateRef..stateAlt, float]
    var lastNode: ViNode
    var spermVNseq: SpermViNodes
    var ithSNP: int
    ## trans/no trans
    var rightGap,leftGap: array[0..1, float]
    for ithSperm in 0..(scSpermSeq.len-1):
      ## rightGap,leftGap = [0.0,0.0]
      spermVNseq = scSpermSeq[ithSperm]
      if spermVNseq.viNodeseq.len==0: continue
      lastNode = spermVNseq.viNodeseq[high(spermVNseq.viNodeseq)]
      currentEm = getEmission(thetaRef=thetaRef,thetaAlt=thetaAlt,
                              cRef=lastNode.cRef,
                              cAlt=lastNode.cAlt)
      if lastNode.pathScore[stateRef] > lastNode.pathScore[stateAlt]:
        scSpermSeq[ithSperm].viNodeseq[high(scSpermSeq[ithSperm].viNodeseq)].state = stateRef
        ithSNP = scSpermSeq[ithSperm].spermSnpIndexLookUp[high(scSpermSeq[ithSperm].viNodeseq)+1]
        outFileVStateMtx.writeLine($ithSNP & " " & $(ithSperm+1) & " 1")
        inferProb  = currentEm[stateRef]
        reverseProb = currentEm[stateAlt]
      else:
        scSpermSeq[ithSperm].viNodeseq[high(scSpermSeq[ithSperm].viNodeseq)].state = stateAlt
        ithSNP = scSpermSeq[ithSperm].spermSnpIndexLookUp[high(scSpermSeq[ithSperm].viNodeseq)+1]
        outFileVStateMtx.writeLine($ithSNP & " " & $(ithSperm+1) & " 2")
        inferProb  = currentEm[stateAlt]
        reverseProb = currentEm[stateRef]
      posEnd = lastNode.pos
      cSNP = 1
      ## traceback for yielding the most probable state sequence                 
      for i in 1..high(scSpermSeq[ithSperm].viNodeseq):
        var state =  scSpermSeq[ithSperm].viNodeseq[^i].state
        scSpermSeq[ithSperm].viNodeseq[^(i+1)].state = scSpermSeq[ithSperm].viNodeseq[^i].pathState[state]
        prevEm = getEmission(thetaRef=thetaRef,thetaAlt=thetaAlt,
                             cRef=scSpermSeq[ithSperm].viNodeseq[^(i+1)].cRef,
                             cAlt=scSpermSeq[ithSperm].viNodeseq[^(i+1)].cAlt)        
        ithSNP = scSpermSeq[ithSperm].spermSnpIndexLookUp[high(scSpermSeq[ithSperm].viNodeseq)-i+1]
        transProb = getTrans(scSpermSeq[ithSperm].viNodeseq[^(i+1)].pos,
                                 scSpermSeq[ithSperm].viNodeseq[^(i)].pos,
                                 cmPmb=cmPmb)
        if scSpermSeq[ithSperm].viNodeseq[^(i+1)].state == stateRef:
          outFileVStateMtx.writeLine($ithSNP & " " & $(ithSperm+1) & " 1")
          if state == stateRef:
             # not transitioning to a different state ie still in this segment of same state
            inferProb+=prevEm[stateRef]
            reverseProb+=prevEm[stateAlt]
            cSNP += 1
            transitFlag = false
            #posStart = scSpermSeq[ithSperm].viNodeseq[^(i+1)].pos
          else: # there is transition to different state: ref(start) to alt(end) now output the segment info
            posStart = scSpermSeq[ithSperm].viNodeseq[^(i)].pos
            #leftGapSize = scSpermSeq[ithSperm].viNodeseq[^(i)].pos - scSpermSeq[ithSperm].viNodeseq[^(i+1)].pos
Ruqian Lyu's avatar
Ruqian Lyu committed
328
            leftGap=[math.ln(transProb),math.ln(1-transProb)]
Ruqian Lyu's avatar
Ruqian Lyu committed
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
            inferProb+=leftGap[0]
            reverseProb+=leftGap[1]
            viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 2")
            transitFlag = true
            cSNP = 1
            rightGap = leftGap
            inferProb = prevEm[stateRef]+rightGap[0]
            reverseProb = prevEm[stateAlt]+rightGap[1]
            posEnd = scSpermSeq[ithSperm].viNodeseq[^(i+1)].pos
        else:
          outFileVStateMtx.writeLine($ithSNP & " " & $(ithSperm+1) & " 2")
          if state == stateAlt:
             # not transitioning to a different state ie still in this segment of same state
            inferProb += prevEm[stateAlt]
            reverseProb += prevEm[stateRef]
            cSNP += 1
            transitFlag = false
          else:
            ## state transit
            posStart = scSpermSeq[ithSperm].viNodeseq[^(i)].pos
Ruqian Lyu's avatar
Ruqian Lyu committed
349
            leftGap=[math.ln(transProb),math.ln(1-transProb)]
Ruqian Lyu's avatar
Ruqian Lyu committed
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
            inferProb += leftGap[0]
            reverseProb += leftGap[1]
            viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 1" )
            transitFlag = true
            cSNP = 1
            rightGap = leftGap
            inferProb = prevEm[stateAlt]+rightGap[0]
            reverseProb = prevEm[stateRef]+rightGap[1]
            posEnd = scSpermSeq[ithSperm].viNodeseq[^(i+1)].pos
      ## traced to the start position of the chromosome for this cell 
      #leftGap = [0.0,0.0]
      if not transitFlag:
        ## the first SNP is included in the segment from traced from back
        posStart = scSpermSeq[ithSperm].viNodeseq[0].pos
        if scSpermSeq[ithSperm].viNodeseq[0].state == stateRef:
          viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 1" )
        else:
          viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 2" )
      else:
        ## the first node has different state from the second, the first node has its own segment
        posStart = scSpermSeq[ithSperm].viNodeseq[0].pos
        posEnd = posStart
        cSNP = 1
        currentEm = getEmission(thetaRef=thetaRef,thetaAlt=thetaAlt,
                                cRef=scSpermSeq[ithSperm].viNodeseq[0].cRef,
                                cAlt=scSpermSeq[ithSperm].viNodeseq[0].cAlt) 
        transProb = getTrans(scSpermSeq[ithSperm].viNodeseq[0].pos,
                                 scSpermSeq[ithSperm].viNodeseq[1].pos,
                                 cmPmb=cmPmb)
        if scSpermSeq[ithSperm].viNodeseq[0].state == stateRef:
Ruqian Lyu's avatar
Ruqian Lyu committed
380
381
          inferProb = currentEm[stateRef]+math.ln(transProb)
          reverseProb = currentEm[stateAlt]+math.ln(1-transProb)
Ruqian Lyu's avatar
Ruqian Lyu committed
382
383
          viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 1" )
        else:
Ruqian Lyu's avatar
Ruqian Lyu committed
384
385
          inferProb = currentEm[stateAlt]+math.ln(transProb)
          reverseProb = currentEm[stateRef]+math.ln(1-transProb)
Ruqian Lyu's avatar
Ruqian Lyu committed
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
          viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 2" )
      transitFlag = false
    outFileTotalCountMtx.setPosition(49)
    outFileVStateMtx.setPosition(49)
    outFileAltCountMtx.setPosition(49)
    outFileAltCountMtx.write($snpIndex & " " & $barcodeTable.len & " " & $nnsize)
    outFileVStateMtx.write($snpIndex & " " & $barcodeTable.len & " " & $nnsize)
    outFileTotalCountMtx.write($snpIndex & " " & $barcodeTable.len & " " & $nnsize)
    
    # outHeaderMtx.writeLine() 
    outFileSNPanno.close()
    outFileTotalCountMtx.close()
    outFileAltCountMtx.close()
    # outHeaderMtx.close()
    outFileVStateMtx.close()
    viSegmentInfo.close()
  ibam.close()
  ivcf.close()

  return 0
406
407

when(isMainModule):
Ruqian Lyu's avatar
Ruqian Lyu committed
408
  let version = "0.2.1"
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
  var doc = format("""
  $version

  Usage:
      sscocaller [options] <BAM> <VCF> <barcodeFile> <out_prefix>

Arguments:

  <BAM> the read alignment file with records of single-cell DNA reads
  
  <VCF> the variant call file with records of SNPs

  <barcodeFile> the text file containing the list of cell barcodes

  <out_prefix>  the prefix of output files


Options:
  -t --threads <threads> number of BAM decompression threads [default: 4]
Ruqian Lyu's avatar
Ruqian Lyu committed
428
  -cb --cellbarcode <cellbarcode> the cell barcode tag, by default it is CB
429
430
431
432
433
434
435
436
437
438
439
  --minMAPQ <mapq> Minimum MAPQ for read filtering [default: 20]
  --baseq <baseq>  base quality threshold for a base to be used for counting [default: 13]
  --chrom <chrom> the selected chromsome (whole genome if not supplied,separate by comma if multiple chroms)
  --minDP <minDP> the minimum DP for a SNP to be included in the output file [default: 1]
  --maxDP <maxDP> the maximum DP for a SNP to be included in the output file [default: 5]
  --maxTotalDP <maxTotalDP> the maximum DP across all barcodes for a SNP to be included in the output file [default: 25]
  --minTotalDP <minTotalDP> the minimum DP across all barcodes for a SNP to be included in the output file [default: 10]          
  --chrName <chrName> the chr names with chr prefix or not, if not supplied then no prefix
  --thetaREF <thetaREF> the theta for the binomial distribution conditioning on hidden state being REF [default: 0.1]
  --thetaALT <thetaALT> the theta for the binomial distribution conditioning on hidden state being ALT [default: 0.9]
  --cmPmb <cmPmb> the average centiMorgan distances per megabases default 0.1 cm per Mb [default 0.1]
440
441
442
  -h --help  show help

  Examples
Ruqian Lyu's avatar
Ruqian Lyu committed
443
      ./sscocaller --threads 10 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/ccsnp
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
""" % ["version", version])

  let args = docopt(doc, version=version)
  
  var
    threads:int
    vcff:string
    barcodeFile:string
    bamfile:string
    selectedChrs:string
    out_dir:string
    mapq:int
    minbsq:int
    mintotal:int
    maxtotal:int
    mindp:int
    maxdp:int
    thetaREF:float
    thetaALT:float
    cmPmb:float
Ruqian Lyu's avatar
Ruqian Lyu committed
464
    barcodeTag="CB"
465
466
467

  if($args["--threads"] != "nil"):
      threads = parse_int($args["--threads"])
Ruqian Lyu's avatar
Ruqian Lyu committed
468
  if($args["--cellbarcode"] != "nil"):
Ruqian Lyu's avatar
Ruqian Lyu committed
469
      barcodeTag = $args["--cellbarcode"]
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
  else: threads = 4
  if($args["--minDP"] != "nil"):
      mindp = parse_int($args["--minDP"])
  else: mindp = 1
  if($args["--maxDP"] != "nil"):
      maxdp = parse_int($args["--maxDP"])
  else: maxdp = 10

  if($args["--maxTotalDP"] != "nil"):
      maxtotal = parse_int($args["--maxTotalDP"])
  else: maxtotal = 30

  if($args["--minTotalDP"] != "nil"):
      mintotal = parse_int($args["--minTotalDP"])
  else: mintotal = 10

  if($args["<BAM>"] == "nil"):
    quit "input bam is required"
  else: bamfile = $args["<BAM>"]

  if($args["<barcodeFile>"] == "nil"):
    quit "input barcodeFile is required"
  else: barcodeFile = $args["<barcodeFile>"]

  if($args["<out_prefix>"] == "nil"):
    quit "output prefix is required"
  else: out_dir = $args["<out_prefix>"]

  if($args["<VCF>"] == "nil"):
    quit "input VCF file is required"
  else: vcff = $args["<VCF>"]

  if($args["--minMAPQ"] != "nil"):
    mapq = parse_int($args["--minMAPQ"])
  else: mapq = 20
  if($args["--baseq"] != "nil"):
    minbsq = parse_int($args["--baseq"])
  else: minbsq = 13
  if($args["--thetaREF"] != "nil"):
    thetaRef = parse_float($args["--thetaREF"])
  else: thetaRef = 0.1

  if($args["--thetaALT"] != "nil"):
    thetaAlt = parse_float($args["--thetaALT"])
  else: thetaAlt = 0.9
  
  if($args["--cmPmb"] != "nil"):
    cmPmb = parse_float($args["--cmPmb"])
  else: cmPmb = 0.1  
  if($args["--chrom"] != "nil"):
    selectedChrs = $args["--chrom"]
  else:
    let a = toSeq(1..19)
    if($args["--chrName"] == "nil"):
      let b = map(a, proc (x: int): string = "" & $x)
      let all_Chrs = concat(b,@["X","Y"])
      selectedChrs = all_Chrs.join(",")
    else: 
      let b = map(a, proc (x: int): string = "chr" & $x)
      let all_Chrs = concat(b,@["chrX","chrY"])
      selectedChrs = all_Chrs.join(",")
Ruqian Lyu's avatar
Ruqian Lyu committed
531
  
532
533
534
535
536
537
  let s_Chrs = selectedChrs.split(',')
  #echo $s_Chrs
  
  #var args = commandLineParams()
  discard sscocaller(threads, vcff, barcodeFile, bamfile,
                     out_dir, mapq, minbsq, mintotal, 
Ruqian Lyu's avatar
Ruqian Lyu committed
538
                     maxtotal, mindp, maxdp, thetaREF, thetaALT, cmPmb,s_Chrs,barcodeTag)
539