From d9e0289722aaa96365b77da2b0270aeea1b05bb0 Mon Sep 17 00:00:00 2001 From: rlyu <rlyu@svi.edu.au> Date: Wed, 26 May 2021 16:40:45 +1000 Subject: [PATCH] add bulk support --- src/sscocaller.nim | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/sscocaller.nim b/src/sscocaller.nim index 50201b1..9471c6b 100755 --- a/src/sscocaller.nim +++ b/src/sscocaller.nim @@ -21,9 +21,6 @@ type cref: int calt: int type - AlleleExpr = object - cRef: int - cAlt: int # Nucleotide = enum # A, C, T, G ViState = enum @@ -42,16 +39,9 @@ type ## look up from current Sperm SNP index to SNPIndex spermSnpIndexLookUp: Table[int,int] SeqSpermViNodes = seq[SpermViNodes] -#var alexprSeq:seq[AlleleExpr] proc inc_count_cref(a:allele_expr) = inc(a.cref) proc inc_count_calt(a:allele_expr) = inc(a.calt) -proc reset_count(a:allele_expr) = - a.calt = 0 - a.cref = 0 -proc tostring(a:allele_expr, s:var string) {.inline.} = - s.set_len(0) - s.add("\t" & $a.cref & "," & $a.calt) proc getEmission(thetaRef=0.1,thetaAlt=0.9, cRef:int,cAlt:int): array[stateRef..stateAlt, float] = @@ -69,7 +59,7 @@ proc getTrans(pos1:int64,pos2:int64,cmPmb=0.1): float = proc countAllele(rec:Variant, ibam:Bam, maxTotalReads:int, minTotalReads:int, chrom:string, mapq: int, - barcodeTable:OrderedTableRef,minbsq:int): Table[string,allele_expr] = + barcodeTable:OrderedTableRef,minbsq:int,bulkBam:bool): Table[string,allele_expr] = var alleleCountTable = initTable[string,allele_expr]() var rec_alt:char var total_reads=0 @@ -156,6 +146,7 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string, kstr.m = 0 kstr.s = nil var ithSperm=0 + var bulkBam = false ## initiate the table with CB as has keys, allele counts (ref object) as elements while hts_getline(hf, cint(10), addr kstr) > 0: if $kstr.s[0] == "#": @@ -163,12 +154,14 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string, var v = $kstr.s discard barcodeTable.hasKeyOrPut(v, ithSperm) ithSperm+=1 + if barcodeTable.len == 1 and barcodeTable.hasKey("bulk"): + echo "running in bulk mode and all reads are regarded as from one sample/cell" + bulkBam = true let initProb:array[stateRef..stateAlt, float]=[0.5,0.5] ## iterate through each selected chromosome for chrom in s_Chrs: # var scTable = initTable[string,SingleSpermRec]() var snpIndex = 0 - var spermIndex = 0 ## number of non zeros var nnsize = 0 var scSpermSeq:SeqSpermViNodes @@ -208,7 +201,8 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string, barcodeTable=barcodeTable, minbsq=minbsq, maxTotalReads = maxtotal, - minTotalReads = mintotal) + minTotalReads = mintotal, + bulkBam = bulkBam) if alleleCountTable.len==0: continue var rec_alt:char @@ -410,7 +404,7 @@ proc sscocaller(threads:int, vcff:string, barcodeFile:string, return 0 when(isMainModule): - let version = "0.1.0" + let version = "0.2.0" var doc = format(""" $version @@ -444,7 +438,7 @@ Options: -h --help show help Examples - ./sscocaller --threads 10 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/ccsnp- + ./sscocaller --threads 10 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/ccsnp """ % ["version", version]) let args = docopt(doc, version=version) -- GitLab