Skip to content
Snippets Groups Projects
Commit 1c69f700 authored by Ruqian Lyu's avatar Ruqian Lyu
Browse files

breakdown to modules and refactor

parent 460f8534
No related branches found
No related tags found
No related merge requests found
Pipeline #6980 failed
## implements the traceback function
import utils
from graph import SpermViNodes
import streams
import tables
import math
proc pathTrackBack*(currentSperm: var SpermViNodes,
ithSperm: int,
thetaRef: float,
thetaAlt: float,
cmPmb: float,
outFileVStateMtx: var FileStream,
viSegmentInfo: var FileStream,
posEnd: var int64,
inferProb: var float,
reverseProb: var float): int =
var currentEm,prevEm: array[stateRef..stateAlt, float]
var posStart:int64
var transitFlag = false
var cSNP = 1
var ithSNP: int
var transProb: float
var rightGap,leftGap: array[0..1, float]
for i in 1..high(currentSperm.viNodeseq):
var state = currentSperm.viNodeseq[^i].state
currentSperm.viNodeseq[^(i+1)].state = currentSperm.viNodeseq[^i].pathState[state]
prevEm = getEmission(thetaRef=thetaRef,thetaAlt=thetaAlt,
cRef=currentSperm.viNodeseq[^(i+1)].cRef,
cAlt=currentSperm.viNodeseq[^(i+1)].cAlt)
ithSNP = currentSperm.spermSnpIndexLookUp[high(currentSperm.viNodeseq)-i+1]
transProb = getTrans(currentSperm.viNodeseq[^(i+1)].pos,
currentSperm.viNodeseq[^(i)].pos,
cmPmb=cmPmb)
if currentSperm.viNodeseq[^(i+1)].state == stateRef:
outFileVStateMtx.writeLine($ithSNP & " " & $(ithSperm+1) & " 1")
if state == stateRef:
# not transitioning to a different state ie still in this segment of same state
inferProb+=prevEm[stateRef]
reverseProb+=prevEm[stateAlt]
cSNP += 1
transitFlag = false
#posStart = currentSperm[^(i+1)].pos
else: # there is transition to different state: ref(start) to alt(end) now output the segment info
posStart = currentSperm.viNodeseq[^(i)].pos
#leftGapSize = currentSperm[^(i)].pos - currentSperm[^(i+1)].pos
leftGap=[math.ln(transProb),math.ln(1-transProb)]
inferProb+=leftGap[0]
reverseProb+=leftGap[1]
viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 2")
transitFlag = true
cSNP = 1
rightGap = leftGap
inferProb = prevEm[stateRef]+rightGap[0]
reverseProb = prevEm[stateAlt]+rightGap[1]
posEnd = currentSperm.viNodeseq[^(i+1)].pos
else:
outFileVStateMtx.writeLine($ithSNP & " " & $(ithSperm+1) & " 2")
if state == stateAlt:
# not transitioning to a different state ie still in this segment of same state
inferProb += prevEm[stateAlt]
reverseProb += prevEm[stateRef]
cSNP += 1
transitFlag = false
else:
## state transit
posStart = currentSperm.viNodeseq[^(i)].pos
leftGap=[math.ln(transProb),math.ln(1-transProb)]
inferProb += leftGap[0]
reverseProb += leftGap[1]
viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 1" )
transitFlag = true
cSNP = 1
rightGap = leftGap
inferProb = prevEm[stateAlt]+rightGap[0]
reverseProb = prevEm[stateRef]+rightGap[1]
posEnd = currentSperm.viNodeseq[^(i+1)].pos
## traced to the start position of the chromosome for this cell
#leftGap = [0.0,0.0]
if not transitFlag:
## the first SNP is included in the segment from traced from back
posStart = currentSperm.viNodeseq[0].pos
if currentSperm.viNodeseq[0].state == stateRef:
viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 1" )
else:
viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 2" )
else:
## the first node has different state from the second, the first node has its own segment
posStart = currentSperm.viNodeseq[0].pos
posEnd = posStart
cSNP = 1
currentEm = getEmission(thetaRef=thetaRef,thetaAlt=thetaAlt,
cRef=currentSperm.viNodeseq[0].cRef,
cAlt=currentSperm.viNodeseq[0].cAlt)
transProb = getTrans(currentSperm.viNodeseq[0].pos,
currentSperm.viNodeseq[1].pos,
cmPmb=cmPmb)
if currentSperm.viNodeseq[0].state == stateRef:
inferProb = currentEm[stateRef]+math.ln(transProb)
reverseProb = currentEm[stateAlt]+math.ln(1-transProb)
viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 1" )
else:
inferProb = currentEm[stateAlt]+math.ln(transProb)
reverseProb = currentEm[stateRef]+math.ln(1-transProb)
viSegmentInfo.writeLine("ithSperm" & $ithSperm & " " & $posStart & " " & $posEnd & " " & $(inferProb-reverseProb) & " " & $cSNP & " 2" )
return 0
import tables
import utils
import streams
import hts
import math
type
# Nucleotide = enum
# A, C, T, G
SpermViNodes* = object
viNodeseq*: seq[ViNode]
## look up from SNPIndex to current Sperm SNP index
snpIndexLookUp*: Table[int,int]
## look up from current Sperm SNP index to SNPIndex
spermSnpIndexLookUp*: Table[int,int]
SeqSpermViNodes* = seq[SpermViNodes]
proc addViNode*(barcodeTable: OrderedTableRef,
alleleCountTable: Table[string,allele_expr],
scSpermSeq: var SeqSpermViNodes,
outFileTotalCountMtx: var FileStream,
outFileAltCountMtx: var FileStream,
nnsize: var int,
mindp: int,
maxdp: int,
snpIndex: int,
thetaRef: float,
thetaAlt: float,
rec: Variant,
initProb: array,
cmPmb: float): int =
for bc, ac in alleleCountTable.pairs:
# ac.tostring(acs)
## mindp, maxdp, they are values per cell
if (ac.cref+ac.calt) <= mindp or (ac.cref+ac.calt) >= maxdp: continue
var ithSperm = barcodeTable[bc]
## write to mtx Ref count
outFileTotalCountMtx.writeLine($snpIndex & " " & $(ithSperm+1) & " " & $(ac.cRef+ac.cAlt))
outFileAltCountMtx.writeLine($snpIndex & " " & $(ithSperm+1) & " " & $ac.cAlt)
nnsize += 1
var emissionArray = getEmission(thetaRef=thetaRef,thetaAlt=thetaAlt,cRef=ac.cRef,cAlt=ac.cAlt)
if scSpermSeq[ithSperm].viNodeseq.len==0:
var currentViNode = ViNode()
currentViNode.pathScore[stateRef] = math.ln(initProb[stateRef])+emissionArray[stateRef]
currentViNode.pathScore[stateAlt] = math.ln(initProb[stateAlt])+emissionArray[stateAlt]
currentViNode.pathState[stateRef] = stateN
currentViNode.pathState[stateAlt] = stateN
currentViNode.state = stateN
currentViNode.pos = int(rec.POS)
currentViNode.cAlt = int(ac.cAlt)
currentViNode.cRef = int(ac.cRef)
scSpermSeq[ithSperm].viNodeseq.add(currentViNode)
scSpermSeq[ithSperm].snpIndexLookUp[snpIndex] = scSpermSeq[ithSperm].viNodeseq.len
scSpermSeq[ithSperm].spermSnpIndexLookUp[scSpermSeq[ithSperm].viNodeseq.len] = snpIndex
else:
let preVNode = scSpermSeq[ithSperm].viNodeseq[high(scSpermSeq[ithSperm].viNodeseq)]
var ltransProb = math.ln(getTrans(preVNode.pos,int(rec.POS),cmPmb=cmPmb))
var lnoTransProb = math.ln(1-getTrans(preVNode.pos,int(rec.POS),cmPmb=cmPmb))
var currentViNode = ViNode()
# ref/alt -> ref
var refTref = preVNode.pathScore[stateRef] + lnoTransProb
var altTref = preVNode.pathScore[stateAlt] + ltransProb
if refTref > altTref:
currentViNode.pathScore[stateRef] = refTref
currentViNode.pathState[stateRef] = stateRef
else:
currentViNode.pathScore[stateRef] = altTref
currentViNode.pathState[stateRef] = stateAlt
# ref/alt -> alt
var refTalt = preVNode.pathScore[stateRef] + ltransProb
var altTalt = preVNode.pathScore[stateAlt] + lnoTransProb
if refTalt > altTalt:
currentViNode.pathScore[stateAlt] = refTalt
currentViNode.pathState[stateAlt] = stateRef
else:
currentViNode.pathScore[stateAlt] = altTalt
currentViNode.pathState[stateAlt] = stateAlt
currentViNode.pathScore[stateAlt] += emissionArray[stateAlt]
currentViNode.pathScore[stateRef] += emissionArray[stateRef]
currentViNode.cAlt = int(ac.cAlt)
currentViNode.cRef = int(ac.cRef)
currentViNode.pos = int(rec.POS)
scSpermSeq[ithSperm].viNodeseq.add(currentViNode)
scSpermSeq[ithSperm].snpIndexLookUp[snpIndex] = scSpermSeq[ithSperm].viNodeseq.len
scSpermSeq[ithSperm].spermSnpIndexLookUp[scSpermSeq[ithSperm].viNodeseq.len] = snpIndex
return 0
# The size line
\ No newline at end of file
from distributions/rmath import dbinom
import math
import tables
import hts
type
allele_expr* = ref object
cref*: int
calt*: int
ViState* = enum
stateRef, stateAlt, stateN
ViNode* = object
pos*: int
cRef*: int
cAlt*: int
pathScore*: array[stateRef..stateAlt, float]
pathState*: array[stateRef..stateAlt, ViState]
state*: ViState
# proc cref*(a:allele_expr):int = a.cref
# proc calt*(a:allele_expr):int = a.calt
proc inc_count_cref*(a:allele_expr) = inc(a.cref)
proc inc_count_calt*(a:allele_expr) = inc(a.calt)
# proc pathScore*(v:ViNode):array[stateRef..stateAlt, float] = v.pathScore
proc getEmission*(thetaRef=0.1,thetaAlt=0.9,
cRef:int,cAlt:int): array[stateRef..stateAlt, float] =
var emissionScore = [dbinom(x=float(cAlt),size=(cRef+cAlt),prob=thetaRef,log=true),
dbinom(x=float(cAlt),size=(cRef+cAlt),prob=thetaAlt,log=true)]
return emissionScore
proc getTrans*(pos1:int64,pos2:int64,cmPmb=0.1): float =
if pos2<pos1:
quit "Wrong order of snps"
var rec = 1-math.exp(-float(pos2-pos1)*1e-8*cmPmb) # for autosomes 1*cmPbm cM per Mb
return rec
#
proc countAllele*(rec:Variant, ibam:Bam, maxTotalReads:int,
minTotalReads:int,
chrom:string, mapq: int,
barcodeTable:OrderedTableRef,minbsq:int,bulkBam:bool,barcodeTag:string): Table[string,allele_expr] =
var alleleCountTable = initTable[string,allele_expr]()
var rec_alt:char
var total_reads=0
rec_alt = rec.ALT[0][0]
for aln in ibam.query(chrom = chrom,start = rec.POS.cint-1, stop = rec.POS.cint):
var cbt = tag[string](aln, barcodeTag)
var currentCB: string
if cbt.isNone:
# echo "no cb "
if not bulkBam:
continue
else:
currentCB = "bulk"
else:
currentCB = cbt.get
if aln.flag.unmapped or aln.mapping_quality.cint < mapq or aln.flag.dup: continue
## skip unmapped, duplicated, mapq low reads or aln.flag.secondary or aln.flag.supplementary
## if not aln.flag.proper_pair: continue
if not barcodeTable.hasKey(currentCB):
continue
var
off = aln.start.int
qoff = 0
roff_only = 0
base = 'n'
position = rec.POS.cint
over = 0
for event in aln.cigar:
var cons = event.consumes
#echo $cons
if cons.query:
qoff+=event.len ## the offs to the query sequences
if cons.reference:
off += event.len ## the offs to the reference sequences
if not cons.query:
roff_only += event.len
if off <= position:
continue
over = off - position
# get the base
base = aln.base_at(qoff - over-1)
break
if aln.base_quality_at(qoff - over-1).cint < minbsq:
continue
total_reads+=1
if alleleCountTable.hasKey(currentCB):
if aln.base_at(qoff - over-1) == rec.REF[0]:
alleleCountTable[currentCB].inc_count_cref
continue
if aln.base_at(qoff - over-1) == rec_alt:
alleleCountTable[currentCB].inc_count_calt
continue
else:
var new_snp = allele_expr(cref:0,calt:0)
alleleCountTable[currentCB] = new_snp
if aln.base_at(qoff - over-1) == rec.REF[0]:
alleleCountTable[currentCB].inc_count_cref
continue
if aln.base_at(qoff - over-1) == rec_alt:
alleleCountTable[currentCB].inc_count_calt
continue
if total_reads > maxTotalReads or total_reads < minTotalReads:
return initTable[string,allele_expr]()
return alleleCountTable
\ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment