from distributions/rmath import dbinom import math import tables import hts type allele_expr* = ref object cref*: int calt*: int ViState* = enum stateRef, stateAlt, stateN ViNode* = object pos*: int cRef*: int cAlt*: int pathScore*: array[stateRef..stateAlt, float] pathState*: array[stateRef..stateAlt, ViState] state*: ViState # proc cref*(a:allele_expr):int = a.cref # proc calt*(a:allele_expr):int = a.calt proc inc_count_cref*(a:allele_expr) = inc(a.cref) proc inc_count_calt*(a:allele_expr) = inc(a.calt) # proc pathScore*(v:ViNode):array[stateRef..stateAlt, float] = v.pathScore proc getEmission*(thetaRef=0.1,thetaAlt=0.9, cRef:int,cAlt:int): array[stateRef..stateAlt, float] = var emissionScore = [dbinom(x=float(cAlt),size=(cRef+cAlt),prob=thetaRef,log=true), dbinom(x=float(cAlt),size=(cRef+cAlt),prob=thetaAlt,log=true)] return emissionScore proc getTrans*(pos1:int64,pos2:int64,cmPmb=0.1): float = if pos2<pos1: quit "Wrong order of snps" var rec = 1-math.exp(-float(pos2-pos1)*1e-8*cmPmb) # for autosomes 1*cmPbm cM per Mb return rec # proc countAllele*(rec:Variant, ibam:Bam, maxTotalReads:int, minTotalReads:int, chrom:string, mapq: int, barcodeTable:OrderedTableRef,minbsq:int,bulkBam:bool,barcodeTag:string): Table[string,allele_expr] = var alleleCountTable = initTable[string,allele_expr]() var rec_alt:char var total_reads=0 rec_alt = rec.ALT[0][0] for aln in ibam.query(chrom = chrom,start = rec.POS.cint-1, stop = rec.POS.cint): var cbt = tag[string](aln, barcodeTag) var currentCB: string if cbt.isNone: # echo "no cb " if not bulkBam: continue else: currentCB = "bulk" else: currentCB = cbt.get if aln.flag.unmapped or aln.mapping_quality.cint < mapq or aln.flag.dup: continue ## skip unmapped, duplicated, mapq low reads or aln.flag.secondary or aln.flag.supplementary ## if not aln.flag.proper_pair: continue if not barcodeTable.hasKey(currentCB): continue var off = aln.start.int qoff = 0 roff_only = 0 base = 'n' position = rec.POS.cint over = 0 for event in aln.cigar: var cons = event.consumes #echo $cons if cons.query: qoff+=event.len ## the offs to the query sequences if cons.reference: off += event.len ## the offs to the reference sequences if not cons.query: roff_only += event.len if off <= position: continue over = off - position # get the base base = aln.base_at(qoff - over-1) break if aln.base_quality_at(qoff - over-1).cint < minbsq: continue total_reads+=1 if alleleCountTable.hasKey(currentCB): if aln.base_at(qoff - over-1) == rec.REF[0]: alleleCountTable[currentCB].inc_count_cref continue if aln.base_at(qoff - over-1) == rec_alt: alleleCountTable[currentCB].inc_count_calt continue else: var new_snp = allele_expr(cref:0,calt:0) alleleCountTable[currentCB] = new_snp if aln.base_at(qoff - over-1) == rec.REF[0]: alleleCountTable[currentCB].inc_count_cref continue if aln.base_at(qoff - over-1) == rec_alt: alleleCountTable[currentCB].inc_count_calt continue if total_reads > maxTotalReads or total_reads < minTotalReads: return initTable[string,allele_expr]() return alleleCountTable