Skip to content
Snippets Groups Projects
utils.nim 3.62 KiB

from distributions/rmath import dbinom
import math
import tables
import hts

type 
  allele_expr* = ref object
    cref*: int
    calt*: int
  ViState* = enum
    stateRef, stateAlt, stateN
  ViNode* = object
    pos*: int
    cRef*: int
    cAlt*: int
    pathScore*: array[stateRef..stateAlt, float]
    pathState*: array[stateRef..stateAlt, ViState]      
    state*: ViState  
 
# proc cref*(a:allele_expr):int = a.cref
# proc calt*(a:allele_expr):int = a.calt

proc inc_count_cref*(a:allele_expr) = inc(a.cref)
proc inc_count_calt*(a:allele_expr) = inc(a.calt)
# proc pathScore*(v:ViNode):array[stateRef..stateAlt, float] = v.pathScore

proc getEmission*(thetaRef=0.1,thetaAlt=0.9,
                  cRef:int,cAlt:int): array[stateRef..stateAlt, float] =
  
  var emissionScore = [dbinom(x=float(cAlt),size=(cRef+cAlt),prob=thetaRef,log=true),
                        dbinom(x=float(cAlt),size=(cRef+cAlt),prob=thetaAlt,log=true)]
  return emissionScore

proc getTrans*(pos1:int64,pos2:int64,cmPmb=0.1): float = 
  if pos2<pos1:
    quit "Wrong order of snps"
  var rec = 1-math.exp(-float(pos2-pos1)*1e-8*cmPmb) # for autosomes 1*cmPbm cM per Mb 
  return rec
                   #   
proc countAllele*(rec:Variant, ibam:Bam, maxTotalReads:int,
                  minTotalReads:int,
                  chrom:string, mapq: int,
                  barcodeTable:OrderedTableRef,minbsq:int,bulkBam:bool,barcodeTag:string): Table[string,allele_expr] =
  var alleleCountTable = initTable[string,allele_expr]()
  var rec_alt:char
  var total_reads=0
  rec_alt = rec.ALT[0][0]
  for aln in ibam.query(chrom = chrom,start = rec.POS.cint-1, stop = rec.POS.cint):
    var cbt = tag[string](aln, barcodeTag)
    var currentCB: string
    if cbt.isNone:
    #  echo "no cb "
      if not bulkBam:
        continue  
      else:
        currentCB = "bulk"
    else:
      currentCB = cbt.get  
    if aln.flag.unmapped or aln.mapping_quality.cint < mapq or aln.flag.dup: continue
    ## skip unmapped, duplicated, mapq low reads or aln.flag.secondary or aln.flag.supplementary
    ## if not aln.flag.proper_pair: continue
    if not barcodeTable.hasKey(currentCB): 
      continue
    var 
      off = aln.start.int
      qoff = 0
      roff_only = 0
      base = 'n'
      position = rec.POS.cint
      over = 0
    for event in aln.cigar:
      var cons = event.consumes
      #echo $cons
      if cons.query:
        qoff+=event.len ## the offs to the query sequences
      if cons.reference:
        off += event.len ## the offs to the reference sequences
        if not cons.query:
          roff_only += event.len
      if off <= position:
        continue
      over = off - position
      # get the base 
      base = aln.base_at(qoff - over-1)
      break
    if aln.base_quality_at(qoff - over-1).cint < minbsq: 
      continue
    total_reads+=1
    if alleleCountTable.hasKey(currentCB):
      if aln.base_at(qoff - over-1) == rec.REF[0]: 
        alleleCountTable[currentCB].inc_count_cref
        continue
      if aln.base_at(qoff - over-1) == rec_alt: 
        alleleCountTable[currentCB].inc_count_calt
        continue
    else:
      var new_snp = allele_expr(cref:0,calt:0)
      alleleCountTable[currentCB] = new_snp
      if aln.base_at(qoff - over-1) == rec.REF[0]: 
        alleleCountTable[currentCB].inc_count_cref
        continue
      if aln.base_at(qoff - over-1) == rec_alt: 
        alleleCountTable[currentCB].inc_count_calt
        continue
  if total_reads > maxTotalReads or total_reads < minTotalReads:
    return initTable[string,allele_expr]()
  return alleleCountTable