From d9e0289722aaa96365b77da2b0270aeea1b05bb0 Mon Sep 17 00:00:00 2001
From: rlyu <rlyu@svi.edu.au>
Date: Wed, 26 May 2021 16:40:45 +1000
Subject: [PATCH] add bulk support

---
 src/sscocaller.nim | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/src/sscocaller.nim b/src/sscocaller.nim
index 50201b1..9471c6b 100755
--- a/src/sscocaller.nim
+++ b/src/sscocaller.nim
@@ -21,9 +21,6 @@ type
     cref: int
     calt: int
 type 
-  AlleleExpr = object
-    cRef: int
-    cAlt: int
   # Nucleotide = enum
   #   A, C, T, G 
   ViState = enum
@@ -42,16 +39,9 @@ type
     ## look up from current Sperm SNP index to SNPIndex  
     spermSnpIndexLookUp: Table[int,int]
   SeqSpermViNodes = seq[SpermViNodes]
-#var alexprSeq:seq[AlleleExpr]      
 
 proc inc_count_cref(a:allele_expr) = inc(a.cref)
 proc inc_count_calt(a:allele_expr) = inc(a.calt)
-proc reset_count(a:allele_expr) = 
-  a.calt = 0
-  a.cref = 0
-proc tostring(a:allele_expr, s:var string) {.inline.} =
-  s.set_len(0)
-  s.add("\t" & $a.cref & "," & $a.calt)
 
 proc getEmission(thetaRef=0.1,thetaAlt=0.9,
                   cRef:int,cAlt:int): array[stateRef..stateAlt, float] =
@@ -69,7 +59,7 @@ proc getTrans(pos1:int64,pos2:int64,cmPmb=0.1): float =
 proc countAllele(rec:Variant, ibam:Bam, maxTotalReads:int,
                   minTotalReads:int,
                   chrom:string, mapq: int,
-                  barcodeTable:OrderedTableRef,minbsq:int): Table[string,allele_expr] =
+                  barcodeTable:OrderedTableRef,minbsq:int,bulkBam:bool): Table[string,allele_expr] =
   var alleleCountTable = initTable[string,allele_expr]()
   var rec_alt:char
   var total_reads=0
@@ -156,6 +146,7 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
   kstr.m = 0
   kstr.s = nil
   var ithSperm=0
+  var bulkBam = false
   ## initiate the table with CB as has keys, allele counts (ref object) as elements
   while hts_getline(hf, cint(10), addr kstr) > 0:
     if $kstr.s[0] == "#":
@@ -163,12 +154,14 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
     var v = $kstr.s
     discard barcodeTable.hasKeyOrPut(v, ithSperm)
     ithSperm+=1 
+  if barcodeTable.len == 1 and barcodeTable.hasKey("bulk"):
+    echo "running in bulk mode and all reads are regarded as from one sample/cell"
+    bulkBam = true
   let initProb:array[stateRef..stateAlt, float]=[0.5,0.5]
   ## iterate through each selected chromosome
   for chrom in s_Chrs:
     # var scTable = initTable[string,SingleSpermRec]()
     var snpIndex = 0
-    var spermIndex = 0
     ## number of non zeros
     var nnsize = 0
     var scSpermSeq:SeqSpermViNodes
@@ -208,7 +201,8 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
                                          barcodeTable=barcodeTable,
                                          minbsq=minbsq,
                                          maxTotalReads = maxtotal,
-                                         minTotalReads = mintotal)
+                                         minTotalReads = mintotal,
+                                         bulkBam = bulkBam)
       if alleleCountTable.len==0: continue
 
       var rec_alt:char
@@ -410,7 +404,7 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string,
   return 0
 
 when(isMainModule):
-  let version = "0.1.0"
+  let version = "0.2.0"
   var doc = format("""
   $version
 
@@ -444,7 +438,7 @@ Options:
   -h --help  show help
 
   Examples
-      ./sscocaller --threads 10 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/ccsnp-
+      ./sscocaller --threads 10 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/ccsnp
 """ % ["version", version])
 
   let args = docopt(doc, version=version)
-- 
GitLab