diff --git a/src/sscocaller.nim b/src/sscocaller.nim index a4f965c8fdce8a9cf09d0cd1e459365335ca58d7..51c77c9d0c502ca350416add0a6f9526b9fb9e36 100755 --- a/src/sscocaller.nim +++ b/src/sscocaller.nim @@ -31,7 +31,10 @@ proc sscocaller(argv: var seq[string]): int = -BQ --baseq <baseq> base quality threshold for a base to be used for counting [default: 13] -CHR --chrom <chrom> the selected chromsome (whole genome if not supplied,separate by comma if multiple chroms) -minDP --minDP <minDP> the minimum DP for a SNP to be included in the output file [default: 1] - -maxDP --maxDP <maxDP> the maximum DP for a SNP to be included in the output file [default: 10] + -maxDP --maxDP <maxDP> the maximum DP for a SNP to be included in the output file [default: 5] + -maxTotalDP --maxTotalDP <maxTotalDP> the maximum DP across all barcodes for a SNP to be included in the output file [default: 25] + -minTotalDP --minTotalDP <minTotalDP> the minimum DP across all barcodes for a SNP to be included in the output file [default: 10] + -chrName --chrName <chrName> the chr names with chr prefix or not, if not supplied then no prefix -thetaREF --thetaREF <thetaREF> the theta for the binomial distribution conditioning on hidden state being REF [default: 0.1] -thetaALT --thetaALT <thetaALT> the theta for the binomial distribution conditioning on hidden state being ALT [default: 0.9] @@ -55,6 +58,7 @@ proc sscocaller(argv: var seq[string]): int = mapq:int minbsq:int mintotal:int + maxtotal:int mindp:int maxdp:int thetaREF:float @@ -85,11 +89,18 @@ proc sscocaller(argv: var seq[string]): int = if($args["--minDP"] != "nil"): mindp = parse_int($args["--minDP"]) else: mindp = 1 - if($args["--maxDP"] != "nil"): maxdp = parse_int($args["--maxDP"]) else: maxdp = 10 + if($args["--maxTotalDP"] != "nil"): + maxtotal = parse_int($args["--maxTotalDP"]) + else: maxtotal = 30 + + if($args["--minTotalDP"] != "nil"): + mintotal = parse_int($args["--minTotalDP"]) + else: mintotal = 10 + if($args["<BAM>"] == "nil"): quit "input bam is required" else: bamfile = $args["<BAM>"] @@ -203,12 +214,14 @@ proc sscocaller(argv: var seq[string]): int = quit "Wrong order of snps" var rec = 1-math.exp(-float(pos2-pos1)*1e-8*cmPmb) # for autosomes 1*cmPbm cM per Mb return rec - #maxTotalReads:int - proc countAllele(rec:Variant, ibam:Bam, chrom:string, mapq: int, + # + proc countAllele(rec:Variant, ibam:Bam, maxTotalReads:int, + minTotalReads:int, + chrom:string, mapq: int, barcodeTable:OrderedTableRef,minbsq:int): Table[string,allele_expr] = var alleleCountTable = initTable[string,allele_expr]() var rec_alt:char - # var total_reads=0 + var total_reads=0 rec_alt = rec.ALT[0][0] for aln in ibam.query(chrom = chrom,start = rec.POS.cint-1, stop = rec.POS.cint): if aln.flag.unmapped or aln.mapping_quality.cint < mapq or aln.flag.dup: continue @@ -238,6 +251,7 @@ proc sscocaller(argv: var seq[string]): int = base = aln.base_at(qoff - over) break if aln.base_quality_at(qoff - over-1).cint < minbsq: continue + total_reads+=1 if alleleCountTable.hasKey(cbt.get): if aln.base_at(qoff - over-1) == rec.REF[0]: alleleCountTable[cbt.get].inc_count_cref @@ -254,10 +268,8 @@ proc sscocaller(argv: var seq[string]): int = if aln.base_at(qoff - over-1) == rec_alt: alleleCountTable[cbt.get].inc_count_calt continue - # total_reads+=1 - # if total_reads > maxTotalReads: - # return initTable[string,allele_expr]() - #else: + if total_reads > maxTotalReads or total_reads < minTotalReads: + return initTable[string,allele_expr]() return alleleCountTable ## iterate through each selected chromosome @@ -304,7 +316,9 @@ proc sscocaller(argv: var seq[string]): int = var alleleCountTable = countAllele(rec=rec, ibam=ibam, chrom=chrom, mapq=mapq, barcodeTable=barcodeTable, - minbsq=minbsq) + minbsq=minbsq, + maxTotalReads = maxtotal, + minTotalReads = mintotal) if alleleCountTable.len==0: continue var rec_alt:char