From 737a0a3d03249cd9d51ca790dd0c3397474d858e Mon Sep 17 00:00:00 2001
From: rlyu <rlyu@svi.edu.au>
Date: Mon, 10 May 2021 16:51:25 +1000
Subject: [PATCH] add maxtotalDP mintotalDP options

---
 src/sscocaller.nim | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/src/sscocaller.nim b/src/sscocaller.nim
index a4f965c..51c77c9 100755
--- a/src/sscocaller.nim
+++ b/src/sscocaller.nim
@@ -31,7 +31,10 @@ proc sscocaller(argv: var seq[string]): int =
       -BQ --baseq <baseq>  base quality threshold for a base to be used for counting [default: 13]
       -CHR --chrom <chrom> the selected chromsome (whole genome if not supplied,separate by comma if multiple chroms)
       -minDP --minDP <minDP> the minimum DP for a SNP to be included in the output file [default: 1]
-      -maxDP --maxDP <maxDP> the maximum DP for a SNP to be included in the output file [default: 10]
+      -maxDP --maxDP <maxDP> the maximum DP for a SNP to be included in the output file [default: 5]
+      -maxTotalDP --maxTotalDP <maxTotalDP> the maximum DP across all barcodes for a SNP to be included in the output file [default: 25]
+      -minTotalDP --minTotalDP <minTotalDP> the minimum DP across all barcodes for a SNP to be included in the output file [default: 10]
+                             
       -chrName --chrName <chrName> the chr names with chr prefix or not, if not supplied then no prefix
       -thetaREF --thetaREF <thetaREF> the theta for the binomial distribution conditioning on hidden state being REF [default: 0.1]
       -thetaALT --thetaALT <thetaALT> the theta for the binomial distribution conditioning on hidden state being ALT [default: 0.9]
@@ -55,6 +58,7 @@ proc sscocaller(argv: var seq[string]): int =
       mapq:int
       minbsq:int
       mintotal:int
+      maxtotal:int
       mindp:int
       maxdp:int
       thetaREF:float
@@ -85,11 +89,18 @@ proc sscocaller(argv: var seq[string]): int =
   if($args["--minDP"] != "nil"):
       mindp = parse_int($args["--minDP"])
   else: mindp = 1
-
   if($args["--maxDP"] != "nil"):
       maxdp = parse_int($args["--maxDP"])
   else: maxdp = 10
 
+  if($args["--maxTotalDP"] != "nil"):
+      maxtotal = parse_int($args["--maxTotalDP"])
+  else: maxtotal = 30
+
+  if($args["--minTotalDP"] != "nil"):
+      mintotal = parse_int($args["--minTotalDP"])
+  else: mintotal = 10
+
   if($args["<BAM>"] == "nil"):
     quit "input bam is required"
   else: bamfile = $args["<BAM>"]
@@ -203,12 +214,14 @@ proc sscocaller(argv: var seq[string]): int =
       quit "Wrong order of snps"
     var rec = 1-math.exp(-float(pos2-pos1)*1e-8*cmPmb) # for autosomes 1*cmPbm cM per Mb 
     return rec
-                   #maxTotalReads:int    
-  proc countAllele(rec:Variant, ibam:Bam, chrom:string, mapq: int,
+                   #   
+  proc countAllele(rec:Variant, ibam:Bam, maxTotalReads:int,
+                   minTotalReads:int,
+                   chrom:string, mapq: int,
                    barcodeTable:OrderedTableRef,minbsq:int): Table[string,allele_expr] =
     var alleleCountTable = initTable[string,allele_expr]()
     var rec_alt:char
-    # var total_reads=0
+    var total_reads=0
     rec_alt = rec.ALT[0][0]
     for aln in ibam.query(chrom = chrom,start = rec.POS.cint-1, stop = rec.POS.cint):
       if aln.flag.unmapped or aln.mapping_quality.cint < mapq or aln.flag.dup: continue
@@ -238,6 +251,7 @@ proc sscocaller(argv: var seq[string]): int =
         base = aln.base_at(qoff - over)
         break
       if aln.base_quality_at(qoff - over-1).cint < minbsq: continue
+      total_reads+=1
       if alleleCountTable.hasKey(cbt.get):
         if aln.base_at(qoff - over-1) == rec.REF[0]: 
           alleleCountTable[cbt.get].inc_count_cref
@@ -254,10 +268,8 @@ proc sscocaller(argv: var seq[string]): int =
         if aln.base_at(qoff - over-1) == rec_alt: 
           alleleCountTable[cbt.get].inc_count_calt
           continue
-      # total_reads+=1
-    # if total_reads > maxTotalReads:
-    #   return initTable[string,allele_expr]()
-    #else: 
+    if total_reads > maxTotalReads or total_reads < minTotalReads:
+      return initTable[string,allele_expr]()
     return alleleCountTable
       
   ## iterate through each selected chromosome
@@ -304,7 +316,9 @@ proc sscocaller(argv: var seq[string]): int =
       var alleleCountTable = countAllele(rec=rec, ibam=ibam, 
                                          chrom=chrom, mapq=mapq, 
                                          barcodeTable=barcodeTable,
-                                         minbsq=minbsq)
+                                         minbsq=minbsq,
+                                         maxTotalReads = maxtotal,
+                                         minTotalReads = mintotal)
       if alleleCountTable.len==0: continue
 
       var rec_alt:char
-- 
GitLab