diff --git a/private/blocks_utils.nim b/private/blocks_utils.nim
index 6c3dbc164dc7c4b09ca96e202f8c9ceaae58c8de..8e76e43e6c4b32bfd509ea8aaf8ff4069980caec 100755
--- a/private/blocks_utils.nim
+++ b/private/blocks_utils.nim
@@ -39,7 +39,7 @@ type Block_linkage* = ref object
   linkageType_00* :int
   linkageType_01*: int
   switch_posterior_prob*: float
-  switch_confidence*: float
+  linkage_confidence*: float
   switch*: int
 
 
@@ -53,6 +53,7 @@ proc cal_switch_posterior*(error_rate = 0.1, n_blocks:int, n_sw:int): float =
   var ll_01 = error_rate^(n_blocks-n_sw)*(1-error_rate)^n_sw
   var ll_00 = error_rate^(n_sw)*(1-error_rate)^(n_blocks-n_sw)
   return ll_01/(ll_01+ll_00)
+
 proc is_block_header(line_string:string): bool = 
   if line_string.splitWhitespace()[0] == "BLOCK:":
   # .len == 11:
diff --git a/private/cal_switch_score.nim b/private/cal_switch_score.nim
new file mode 100755
index 0000000000000000000000000000000000000000..e86b69e6d3ab59cc2245dc3dbd2be76c47f56b89
--- /dev/null
+++ b/private/cal_switch_score.nim
@@ -0,0 +1,9 @@
+## calcualte switch score for snps positions that are high risk
+
+let binSize = 1000
+let movingStep = 200
+
+# reduce the search space
+# return seq of snp indexes
+
+proc findHighRiskSnps(fullGeno:seq[int]): seq[int] = 
\ No newline at end of file
diff --git a/private/find_template_cell b/private/find_template_cell
new file mode 100755
index 0000000000000000000000000000000000000000..2349d215e9302d9bb859f146e3f73e55201b74e4
Binary files /dev/null and b/private/find_template_cell differ
diff --git a/private/find_template_cell.nim b/private/find_template_cell.nim
new file mode 100755
index 0000000000000000000000000000000000000000..41478f3105b4affa3176f83f60e060f13b43f832
--- /dev/null
+++ b/private/find_template_cell.nim
@@ -0,0 +1,137 @@
+# find template gamete that serves as a initial haplotype for this individual
+
+import streams
+import sequtils
+import strutils
+import math
+import algorithm
+import utils
+
+type 
+  CellPairs = object
+    cell1*: int
+    cell2*: int
+    coexistSnps*: int
+    dissimilarity*: float
+  
+let
+  minCoexit = 1000
+  maxDissim = 0.0099
+
+var nsnps,ncells,totalEntries:int
+
+proc toBinaryGeno(x: int) : BinaryGeno = 
+  if x == 1: return gREF
+  if x == 2: return gALT
+  quit "mtx geno is not 1 or 2"
+
+proc readGtMtx*(mtxFileStream:FileStream, gtMtx:var seq[seq[BinaryGeno]]):int = 
+  var currentEntrySeq:seq[int]
+  var currentLine:seq[string]
+
+  while not mtxFileStream.atEnd():
+    currentLine = mtxFileStream.readLine().splitWhitespace()
+    ## i j 1-based from file
+    currentEntrySeq = map(currentLine, proc(x: string): int = parseInt(x))
+    gtMtx[(currentEntrySeq[1]-1)][(currentEntrySeq[0]-1)] = currentEntrySeq[2].toBinaryGeno
+  return 0
+
+proc countCoexistMatch(seq1: seq[BinaryGeno], seq2: seq[BinaryGeno]): seq[int] = 
+  if not (seq1.len == seq2.len):
+    quit "cells' genotypes under comparison do not have the same length"
+  var ncoexist, nmatch, cell1Snps, cell2Snps:int
+  for i, g in seq1:
+    if g != gUnknown: cell1Snps += 1
+    if (seq2[i] != gUnknown): cell2Snps += 1
+    if (g != gUnknown and seq2[i]!= gUnknown):
+      ncoexist += 1
+      if(g == seq2[i]):
+        nmatch += 1
+  return [cell1Snps,cell2Snps,nmatch,ncoexist].toSeq
+
+proc findCellPairs(gtMtx:seq[seq[BinaryGeno]],nPairs = 3, nSnpsPercell: var seq[int]): seq[CellPairs] = 
+  let ncells = gtMtx.len
+  var genoMatch = @[0,0,0,0]
+  var returnCp = newSeq[CellPairs](nPairs)
+  var k = 0
+  for celli in 0..(ncells-2):
+    if k == nPairs: break
+    for cellj in (celli+1)..(ncells-1):
+      genoMatch = countCoexistMatch(seq1 = gtMtx[celli], seq2 = gtMtx[cellj])
+      nSnpsPercell[celli] = genoMatch[0]
+      nSnpsPercell[cellj] = genoMatch[1]     
+      if genoMatch[3] >= minCoexit:
+        if (genoMatch[2]/genoMatch[3] < maxDissim) or (genoMatch[2]/genoMatch[3] > (1 - maxDissim)):
+          returnCp[k] = CellPairs(cell1: celli, cell2: cellj, coexistSnps:genoMatch[3], dissimilarity:(genoMatch[2]/genoMatch[3]))
+          k += 1
+      if k == nPairs: break
+  returnCp
+
+
+proc findCellBynSNPs*(nSnpsPercell:seq[int],q = 0.75): int = 
+  # return the selected cells' index
+  # not too many SNPs or too few
+  var chosed:int
+  var seqNsnps = nSnpsPercell
+  sort(seqNsnps, system.cmp[int])
+  int(floor(float(seqNsnps.len) * q))
+
+proc selectTemplateCell*(gtMtx:seq[seq[BinaryGeno]], nPairs = 3): int = 
+  var nSnpsCells = newSeq[int](gtMtx.len)
+  var cellPairs = findCellPairs(gtMtx = gtMtx, nPairs = nPairs, nSnpsPercell = nSnpsCells)
+  echo "cellPairs"
+  echo cellPairs
+  var selectedCell, icp: int
+  if cellPairs[0].coexistSnps == 0 :
+    # no good pairs found
+    selectedCell = findCellBynSNPs(nSnpsPercell = nSnpsCells)
+  else:
+    let coExistSnps = map(cellPairs, proc(x: CellPairs) : int = x.coexistSnps)
+    icp = maxIndex(coExistSnps)
+  if nSnpsCells[cellPairs[icp].cell1] > nSnpsCells[cellPairs[icp].cell2]:
+    echo "selected cell: " & $cellPairs[icp].cell1 & " nSnps: " & $nSnpsCells[cellPairs[icp].cell1]
+    return cellPairs[icp].cell1
+  else:
+    echo "selected cell: " & $cellPairs[icp].cell2 & " nSnps: " & $nSnpsCells[cellPairs[icp].cell2]
+    return cellPairs[icp].cell2
+
+var gtMtx:seq[seq[BinaryGeno]]  
+var currentEntry:seq[int]
+var currentEntrySeq:seq[string]
+var gtMtxFileStream:FileStream
+## i, j are 1-based
+let mtxFile = "/mnt/mcfiles/rlyu/Projects/sgcocaller/test_data/WC_CNV_chr1_nonzero.mtx"
+echo mtxFile
+
+try: 
+  gtMtxFileStream = openFileStream(mtx_file, fmRead)
+except:
+  stderr.write getCurrentExceptionMsg()
+# %%MatrixMarket matrix coordinate integer general
+discard gtMtxFileStream.readLine()
+#N, i,j
+currentEntrySeq = gtMtxFileStream.readLine().splitWhitespace()
+currentEntry = map(currentEntrySeq, proc(x: string): int = parseInt(x))
+nsnps = currentEntry[0]
+echo "nsnps " & $nsnps
+ncells = currentEntry[1]
+echo "ncells " & $ncells
+
+totalEntries = currentEntry[2]
+
+echo "totalEntries " & $totalEntries
+
+
+gtMtx = newSeqWith(ncells,newSeq[BinaryGeno](nsnps))
+
+discard readGtMtx(gtMtxFileStream,gtMtx)
+#var colIndex = 4
+#echo gtMtx[0][27..40]
+
+#echo sliceColumn(gtMtx,56)
+#echo gtMtx.len
+
+#echo findCellPairs(gtMtx = gtMtx)
+#var nSnpsCells = newSeqWith(ncells,0)
+#selectTemplateCell
+echo selectTemplateCell(gtMtx = gtMtx, nPairs =3)
\ No newline at end of file
diff --git a/private/findpath.nim b/private/findpath.nim
index 7498a84a991e7bf2d3db0e0afa166651d421e74b..902ffe50c409b27ad500b888b032e9db69c583f8 100755
--- a/private/findpath.nim
+++ b/private/findpath.nim
@@ -48,7 +48,7 @@ proc pathTrackBack*(currentSperm: var SpermViNodes,
         leftGap=[math.ln(transProb),math.ln(1-transProb)]
         inferProb+=leftGap[0]
         reverseProb+=leftGap[1]
-        viSegmentInfo.writeLine(join(["ithSperm", $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "2"],sep = " "))
+        viSegmentInfo.writeLine(join(["ithSperm" & $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "2"],sep = " "))
         transitFlag = true
         cSNP = 1
         rightGap = leftGap
@@ -69,7 +69,7 @@ proc pathTrackBack*(currentSperm: var SpermViNodes,
         leftGap=[math.ln(transProb),math.ln(1-transProb)]
         inferProb += leftGap[0]
         reverseProb += leftGap[1]
-        viSegmentInfo.writeLine(join(["ithSperm", $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "1"],sep = " "))
+        viSegmentInfo.writeLine(join(["ithSperm" & $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "1"],sep = " "))
         transitFlag = true
         cSNP = 1
         rightGap = leftGap
@@ -82,9 +82,9 @@ proc pathTrackBack*(currentSperm: var SpermViNodes,
     ## the first SNP is included in the segment from traced from back
     posStart = currentSperm.viNodeseq[0].pos
     if currentSperm.viNodeseq[0].state == stateRef:
-      viSegmentInfo.writeLine(join(["ithSperm", $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "1"],sep = " "))
+      viSegmentInfo.writeLine(join(["ithSperm" & $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "1"],sep = " "))
     else:
-      viSegmentInfo.writeLine(join(["ithSperm", $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "2"],sep = " "))
+      viSegmentInfo.writeLine(join(["ithSperm" & $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "2"],sep = " "))
   else:
     ## the first node has different state from the second, the first node has its own segment
     posStart = currentSperm.viNodeseq[0].pos
@@ -95,10 +95,10 @@ proc pathTrackBack*(currentSperm: var SpermViNodes,
     if currentSperm.viNodeseq[0].state == stateRef:
       inferProb = currentEm[stateRef]+math.ln(transProb)
       reverseProb = currentEm[stateAlt]+math.ln(1-transProb)
-      viSegmentInfo.writeLine(join(["ithSperm", $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "1"],sep = " "))
+      viSegmentInfo.writeLine(join(["ithSperm" & $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "1"],sep = " "))
     else:
       inferProb = currentEm[stateAlt]+math.ln(transProb)
       reverseProb = currentEm[stateRef]+math.ln(1-transProb)
-      viSegmentInfo.writeLine(join(["ithSperm", $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "2"],sep = " ") )
+      viSegmentInfo.writeLine(join(["ithSperm" & $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "2"],sep = " ") )
   
   return 0
diff --git a/private/fragments.nim b/private/fragments.nim
index 7eac2de7339221de0f06166ba72033ebf76e946d..1e250bc9261f4521c313f7205eabe00ac164eeea 100755
--- a/private/fragments.nim
+++ b/private/fragments.nim
@@ -3,12 +3,12 @@ import strutils
 import hts
 import utils
 import streams
-# minSnpDepth, this SNP should at least be covered by two cells
 
+# minSnpDepth = 2, this SNP should at least be covered by two cells
 proc findGtNodes*(rec:Variant, variantIndex: int, ibam:Bam, maxTotalReads:int, minTotalReads:int, mapq: int,
                   barcodeTable:TableRef,
                   minbsq:int,
-                  barcodeTag:string, minCellDp:int, minSnpDepth:int ): Table[string,GtNode] =
+                  barcodeTag:string ): Table[string,GtNode] =
 
   var barcodedNodesTable = initTable[string,GtNode]()
   var rec_alt:char
@@ -54,34 +54,37 @@ proc findGtNodes*(rec:Variant, variantIndex: int, ibam:Bam, maxTotalReads:int, m
         continue
   if total_reads > maxTotalReads or total_reads < minTotalReads:
     return initTable[string,GtNode]()
+  return barcodedNodesTable  
+
+proc callNodeGenotype*(bcdTable: var Table[string, GtNode], minCellDp: int, minSnpDepth:int ,p0 = 0.3, p1 = 0.8): Table[string, GtNode] = 
   var calt = 0 
   var cellDP = 0
   var delBarcodes:seq[string]
 
-  for cellbarcode in barcodedNodesTable.keys:
-    cellDP = barcodedNodesTable[cellbarcode].alleles.len
+  for cellbarcode in bcdTable.keys:
+    cellDP = bcdTable[cellbarcode].alleles.len
     if cellDP < minCellDp:
       delBarcodes.add(cellbarcode)
       continue
-    calt = barcodedNodesTable[cellbarcode].alleles.count("1")
-    if calt/cellDP < 0.3:
-      barcodedNodesTable[cellbarcode].genotype = 0
-    elif calt/cellDP > 0.8:
-      barcodedNodesTable[cellbarcode].genotype = 1
+    calt = bcdTable[cellbarcode].alleles.count("1")
+    if calt/cellDP < p0:
+      bcdTable[cellbarcode].genotype = gREF
+    elif calt/cellDP > p1:
+      bcdTable[cellbarcode].genotype = gALT
     else:
       delBarcodes.add(cellbarcode)
       continue
   for bc in delBarcodes:
-    barcodedNodesTable.del(bc)
-  if barcodedNodesTable.len < minSnpDepth:
+    bcdTable.del(bc)
+  if bcdTable.len < minSnpDepth:
     return initTable[string,GtNode]() 
-  return barcodedNodesTable  
+  return bcdTable
 
 ## write the .fmf output stream
 proc writeToFMF*(twoNodesFrag: Fragment, fmf_out:FileStream): Fragment =
   if twoNodesFrag.node2.variantIndex - twoNodesFrag.node1.variantIndex == 1:
  #   echo "writting for cell " & twoNodesFrag.cellbarcode & " with 1 block"
-    if $twoNodesFrag.node1.genotype == "" or $twoNodesFrag.node2.genotype == "":
+    if twoNodesFrag.node1.genotype == gUnknown or twoNodesFrag.node2.genotype == gUnknown:
       quit "Genotype unknown when writing out"
     fmf_out.writeLine(join(["1",twoNodesFrag.cellbarcode & "." & $twoNodesFrag.counter, $twoNodesFrag.node1.variantIndex, 
                             $twoNodesFrag.node1.genotype & $twoNodesFrag.node2.genotype,"~~"], sep = " "))
@@ -98,18 +101,8 @@ proc writeToFMF*(twoNodesFrag: Fragment, fmf_out:FileStream): Fragment =
 proc updateCellFragment*(barcodedNodesTable:Table[string,GtNode],
                          cellFragmentsTable:TableRef[string,Fragment],
                          fmf_out:FileStream): TableRef[string,Fragment] = 
-
  # echo "updating for " & $barcodedNodesTable.len
   for cellbarcode in barcodedNodesTable.keys:
-    # cellDP = barcodedNodesTable[cellbarcode].alleles.len
-    # if cellDP < minCellDp: continue
-    # calt = barcodedNodesTable[cellbarcode].alleles.count("1")
-    # if calt/cellDP < 0.3:
-    #   barcodedNodesTable[cellbarcode].genotype = 0
-    # elif calt/cellDP > 0.8:
-    #   barcodedNodesTable[cellbarcode].genotype = 1
-    # else:
-    #   continue
     if cellFragmentsTable.hasKey(cellbarcode):
       if (not cellFragmentsTable[cellbarcode].node2.isNil) or cellFragmentsTable[cellbarcode].node1.isNil :
         quit "Writing fmf went wrong, the second allele shoud be moved to first after getting one fragment"
diff --git a/private/genotype_mtx b/private/genotype_mtx
new file mode 100755
index 0000000000000000000000000000000000000000..c535f52659c7803e1eb943eef8f990666301d1b4
Binary files /dev/null and b/private/genotype_mtx differ
diff --git a/private/genotype_mtx.nim b/private/genotype_mtx.nim
new file mode 100755
index 0000000000000000000000000000000000000000..66feb5819811ea34cbe1d6bd61512cc5f893315f
--- /dev/null
+++ b/private/genotype_mtx.nim
@@ -0,0 +1,138 @@
+## generate gtMtx, gtMtx_snpAnnot, totalCount.mtx altCount.mtx
+
+## author Ruqian Lyu
+## Date: 2021 Oct
+## gtMtx in 1, or 2
+import tables
+import strutils
+import hts
+import utils
+import streams
+
+import fragments
+
+## these outputs will be per chromosome
+
+
+proc getGtMtx(ibam:Bam, ivcf:VCF, barcodeTable:TableRef, outGtMtx:FileStream, outSnpAnnot:FileStream, 
+              outTotalCountMtx:FileStream, outAltCountMtx:FileStream,maxTotalReads:int,
+              minTotalReads:int,  mapq: int,minbsq:int, barcodeTag:string, minsnpdepth:int,minCellDp:int, maxCellDp:int,p0 = 0.3, p1 = 0.8):int = 
+  var variantIndex = 1
+  var writtenVarintIndex = 1
+  var calt,cellDP,geno,cellIndex,nnsize = 0
+  var delBarcodes:seq[string]
+  var cellGtNodes: Table[string, GtNode]
+  var cellGtNodeCopy: Table[string, GtNode]
+  for rec in ivcf.query("*"): 
+    var rec_alt = rec.ALT[0][0]
+    var rec_ref = rec.REF[0]
+    cellGtNodes = findGtNodes(rec=rec, variantIndex = variantIndex, ibam=ibam, maxTotalReads=maxTotalReads,
+                                  minTotalReads=minTotalReads, mapq = mapq, barcodeTable = barcodeTable,
+                                  minbsq=minbsq,barcodeTag=barcodeTag)
+    cellGtNodeCopy = cellGtNodes
+    for cellbarcode in cellGtNodeCopy.keys:
+      cellDP = cellGtNodes[cellbarcode].alleles.len
+      if cellDP < minCellDp:
+        cellGtNodes.del(cellbarcode)
+        continue
+      calt = cellGtNodes[cellbarcode].alleles.count("1")
+      if calt/cellDP < p0:
+        cellGtNodes[cellbarcode].genotype = gREF
+      elif calt/cellDP > p1:
+        cellGtNodes[cellbarcode].genotype = gALT
+      else:
+        cellGtNodes.del(cellbarcode)
+        continue
+    if cellGtNodes.len < minSnpDepth:
+      variantIndex.inc
+      continue
+    else:
+      for cellbarcode in cellGtNodes.keys:
+        cellDP = cellGtNodes[cellbarcode].alleles.len
+        calt = cellGtNodes[cellbarcode].alleles.count("1")
+        ## $cellGtNodes[cellbarcode].genotype BinaryGeno to "0", "1" or "-1" (not -1), write to file coding in 1 or 2
+        geno = parseInt($cellGtNodes[cellbarcode].genotype) + 1
+        # index from 1 for matrices
+        cellIndex = barcodeTable[cellbarcode] + 1
+        outGtMtx.writeLine(join([$writtenVarintIndex, $cellIndex, $geno],sep = " "))
+        outTotalCountMtx.writeLine(join([$writtenVarintIndex, $cellIndex, $cellDP],sep = " "))
+        outAltCountMtx.writeLine(join([$writtenVarintIndex, $cellIndex, $calt],sep = " "))
+        outSnpAnnot.writeLine(join([$rec.POS, $rec_ref, $rec_alt], sep="\t") )
+        nnsize.inc
+      writtenVarintIndex.inc
+    ## write to matrices
+          
+  echo "processed " & $(variantIndex) & " variants"
+  echo "wrote " & $(writtenVarintIndex) & " variants to matrices and snpAnnot file"
+  for outFileStream in [outGtMtx,outTotalCountMtx,outAltCountMtx]:
+    outFileStream.setPosition(49)
+    outFileStream.write($(writtenVarintIndex) & " " & $barcodeTable.len & " " & $nnsize) 
+  for outFileStream in[outGtMtx,outTotalCountMtx,outAltCountMtx, outSnpAnnot]:
+    outFileStream.close()
+  return 0
+
+let vcf_file = "data/swapped/FVB_NJ.mgp.v5.snps.dbSNP142.homo.alt.modified.swapped.GT.chr1.vcf.gz"
+let bam_file = "data/WC_CNV_42/WC_CNV_42.bam"
+let barcode_file = "data/WC_CNV_42/WC_CNV_42_filteredBC.tsv"
+var
+  ibam:Bam
+  ivcf:VCF
+  outGtMtx, outSnpAnnot, outTotalCountMtx, outAltCountMtx:FileStream
+if not open(ibam, bam_file, threads=1, index = true):
+    quit "couldn't open input bam"
+if not open(ivcf, vcf_file, threads=1):
+    quit "couldn't open: vcf file"
+
+var hf = hts.hts_open(cstring(barcode_file), "r")
+#### TODO : Table size
+var barcodeTable =  newTable[string,int](initialSize = 1024)
+var kstr: hts.kstring_t
+kstr.l = 0
+kstr.m = 0
+kstr.s = nil
+var ithSperm = 0
+## initiate the table with CB as keys, allele counts (ref object) as elements
+while hts_getline(hf, cint(10), addr kstr) > 0:
+  if $kstr.s[0] == "#":
+    continue
+  var v = $kstr.s
+  discard barcodeTable.hasKeyOrPut(v, ithSperm)
+  ithSperm.inc
+discard hf.hts_close()
+
+let s_Chrs = @["chr1"]
+let out_prefix = "test_data/getMtx/"
+echo "generating gtMtx"
+try: 
+  outGtMtx = openFileStream(out_prefix & "_gtMtx.mtx", fmWrite)
+  outSnpAnnot = openFileStream(out_prefix & "_snpAnnot.txt", fmWrite)
+  outTotalCountMtx = openFileStream(out_prefix & "_totalMtx.mtx", fmWrite)
+  outAltCountMtx = openFileStream(out_prefix & "_AltMtx.mtx", fmWrite)
+except:
+  stderr.write getCurrentExceptionMsg()
+
+## write headers to those mtx files and the first line place holder for total_row total_column total_entry
+let sparseMatrixHeader = "%%MatrixMarket matrix coordinate integer general"
+
+for outFileStream in [outGtMtx,outTotalCountMtx,outAltCountMtx]:
+  outFileStream.writeLine(sparseMatrixHeader)
+  outFileStream.writeLine(' '.repeat(50))
+
+outSnpAnnot.writeLine(join(["POS","REF", "ALT"], sep = "\t"))
+
+discard getGtMtx(ibam = ibam, ivcf = ivcf, barcodeTable = barcodeTable, outGtMtx = outGtMtx, outTotalCountMtx = outTotalCountMtx ,outAltCountMtx = outAltCountMtx,
+                 outSnpAnnot = outSnpAnnot, maxTotalReads = 30,
+                 minTotalReads = 5, mapq = 20, minbsq = 13, minCellDp = 2,maxCellDp =10,barcodeTag = "CB",minsnpdepth=1)
+
+
+var outGtMtxFile, outTotalCountMtxFile,outAltCountMtxFile : string
+
+## sort entries in mtx files 
+for chrom in s_Chrs:
+  outGtMtxFile = out_prefix & chrom & "_gtMtx.mtx"
+  outTotalCountMtxFile = out_prefix & chrom &  "_totalMtx.mtx"
+  outAltCountMtxFile = out_prefix & chrom &  "_AltMtx.mtx"
+  for mtxFile in [outGtMtxFile, outTotalCountMtxFile,outAltCountMtxFile]:
+    var imtx = readMtx(mtx_file = mtxFile)
+    discard sortWriteMtx(imtx, mtx_file = mtxFile)
+
diff --git a/private/infer_missing_snps b/private/infer_missing_snps
new file mode 100755
index 0000000000000000000000000000000000000000..85df612e74316fd0810c1ef6ddc70cf358b34890
Binary files /dev/null and b/private/infer_missing_snps differ
diff --git a/private/infer_missing_snps.nim b/private/infer_missing_snps.nim
new file mode 100755
index 0000000000000000000000000000000000000000..ab4174623d6846b85cd8999c16bc2f1e4346e457
--- /dev/null
+++ b/private/infer_missing_snps.nim
@@ -0,0 +1,130 @@
+## infer missing snp in template cell by looking at the snp's link to other nearby snps in other cells
+
+## author Ruqian Lyu
+## rlyu@svi.edu.au
+import find_template_cell
+import sequtils
+import math
+import streams
+import strutils
+import utils
+
+let nAnchorSnps = 10
+proc sliceColumn*(gtMtx:seq[seq[BinaryGeno]],colIndex:int): seq[BinaryGeno] = 
+  map(gtMtx, proc(x:seq[BinaryGeno]):BinaryGeno = x[colIndex])
+
+# cell_geno seq of length nAnchorSnps
+# temp_geno seq of length nAnchorSnps
+# return whether cell_geno matches with temp_geno in hap0 or hap1(bitwise complementary to temp_geno)
+proc matchTemplateHap(cell_geno:seq[BinaryGeno], temp_geno:seq[BinaryGeno],error_rate = 0.1): seq[float] = 
+  var nmatch = 0
+  for i,g in cell_geno:
+    if g == temp_geno[i]:
+      nmatch += 1
+  let mis = cell_geno.len - nmatch
+  let ll_h0 = error_rate^mis*(1-error_rate)^(nmatch)
+  let ll_h1 = error_rate^nmatch*(1-error_rate)^(mis)
+  if ll_h0 > ll_h1:
+    return @[0.0, ll_h0/(ll_h0 + ll_h1)]
+  elif ll_h0 == ll_h1:
+    return @[-1.0, 0.5]
+  else:
+    return @[1.0, ll_h1/(ll_h0 + ll_h1)]
+
+proc inferGeno(type00:int, type10:int, error_rate = 0.1,posterior_thresh = 0.98): BinaryGeno = 
+  let prob_0 = error_rate^type10*(1-error_rate)^(type00)
+  let prob_1 = error_rate^type00*(1-error_rate)^(type10)
+  let prob_0_p = prob_0/(prob_0 + prob_1)
+  let prob_1_p = 1 - prob_0_p 
+  if max(prob_0_p,prob_1_p) > posterior_thresh:
+    if prob_0_p > prob_1_p:
+      return gREF
+    else:
+      return gALT
+  return gUnknown
+# return full sequence of template geno by inferring missing SNPs's genotypes 
+proc inferSnpGeno(templateGeno: seq[BinaryGeno], gtMtx:seq[seq[BinaryGeno]], posterior_thresh = 0.99):seq[BinaryGeno] = 
+  var fullGeno = templateGeno
+  let nCells = gtMtx.len
+  let nSnps = gtMtx[0].len
+  var coexisPos:seq[int]
+  var snpLD: seq[float]
+  var type00, type10: int
+
+  var snpiInCells = newSeq[BinaryGeno](gtMtx.len)
+  var offset = 1
+  for i,missingSnpi in templateGeno:
+    if missingSnpi != gUnknown: continue
+    snpiInCells = sliceColumn(gtMtx,i)
+    type00 = 0
+    type10 = 0
+    for j,snpiGeno in snpiInCells:
+      if snpiGeno == gUnknown: continue
+      # find 10 coexisting positions for cell j with template geno seq
+      offset = 1
+      coexisPos = newSeq[int]()
+      while(coexisPos.len < nAnchorSnps):
+        if ((i+offset) < nSnps):
+          if((gtMtx[j][i+offset] != gUnknown) and (fullGeno[i+offset] != gUnknown)):
+            coexisPos.add(i+offset)
+        if (i-offset) >= 0:
+          if((gtMtx[j][i-offset] != gUnknown) and (fullGeno[i-offset] != gUnknown)):
+            coexisPos.add(i-offset)
+        offset += 1
+      snpLD = matchTemplateHap(cell_geno = map(coexisPos,proc(x:int): BinaryGeno =  gtMtx[j][x]), temp_geno = map(coexisPos,proc(x:int): BinaryGeno =  fullGeno[x]) )
+      if snpLD[1] > posterior_thresh:
+        if snpLD[0] == 1.0:
+          if snpiGeno == gREF: ## snpiGeno is 1 or 2 
+            type10 += 1 
+          else:
+            type00 += 1
+        else:
+          if snpiGeno == gREF: 
+            type00 += 1 
+          else:
+            type10 += 1
+      else: continue
+    # echo "type00 " & $type00
+    # echo "type10 " & $type10
+    fullGeno[i] = inferGeno(type00, type10)  
+    if i > 20:
+      break
+  return fullGeno
+
+
+
+var gtMtx:seq[seq[BinaryGeno]]  
+var currentEntry:seq[int]
+var currentEntrySeq:seq[string]
+var gtMtxFileStream:FileStream
+## i, j are 1-based
+let mtxFile = "/mnt/mcfiles/rlyu/Projects/sgcocaller/test_data/WC_CNV_chr1_nonzero.mtx"
+echo mtxFile
+
+try: 
+  gtMtxFileStream = openFileStream(mtx_file, fmRead)
+except:
+  stderr.write getCurrentExceptionMsg()
+# %%MatrixMarket matrix coordinate integer general
+discard gtMtxFileStream.readLine()
+#N, i,j
+currentEntrySeq = gtMtxFileStream.readLine().splitWhitespace()
+currentEntry = map(currentEntrySeq, proc(x: string): int = parseInt(x))
+var nsnps = currentEntry[0]
+echo "nsnps " & $nsnps
+var ncells = currentEntry[1]
+echo "ncells " & $ncells
+
+var totalEntries = currentEntry[2]
+
+echo "totalEntries " & $totalEntries
+
+
+gtMtx = newSeqWith(ncells,newSeq[BinaryGeno](nsnps))
+
+discard readGtMtx(gtMtxFileStream,gtMtx)
+
+var tempcell = selectTemplateCell(gtMtx = gtMtx, nPairs =3)
+var tempcellGeno = gtMtx[tempcell]
+let fullGeno = inferSnpGeno(tempcellGeno, gtMtx)
+echo fullGeno.len
diff --git a/private/infer_snp_hap.nim b/private/infer_snp_hap.nim
new file mode 100755
index 0000000000000000000000000000000000000000..0ef2896c2afd9aa5c835e9db4ead71550a642be5
--- /dev/null
+++ b/private/infer_snp_hap.nim
@@ -0,0 +1,5 @@
+# infer SNP hap, give the current hap
+
+var hapVCFFile = ""
+var inputVCFFile:
+var outVCFFile:
\ No newline at end of file
diff --git a/private/phase_blocks.nim b/private/phase_blocks.nim
index 9f2b2db9f74207839c30609bdde79874fe5e1c0b..8fdbbb424d46bde3e2194a0f432cd30b5f4d10d9 100755
--- a/private/phase_blocks.nim
+++ b/private/phase_blocks.nim
@@ -7,10 +7,10 @@ import math
 import sequtils
 import hts
 
-let n_anchor_snps = 9
-let posterior_threshold = 0.99
+let n_anchor_snps = 19
+let posterior_threshold = 0.9999
 let confidence_threshold = 0.95
-let debug = false
+let debug = true
 
 proc match_phase(cell_geno:seq[int], block_h0:seq[int]): int =
   if cell_geno.len != block_h0.len:
@@ -75,6 +75,7 @@ proc find_contig_blocks*(blocks:seq[Block],cellBlockLeftPhaseTable: TableRef[str
   var contig_blocks = newSeq[int]()
   var count_missing =  newSeqWith[blocks.len,0]
   var contig_block_allow_missing = int(ceil(float(cellBlockLeftPhaseTable.len) * 0.1))
+  if debug: echo "contig_block_allow_missing: " & $contig_block_allow_missing
   # for b in 0..high(blocks):
   var left_right_phase: seq[tuple[left:int,right:int]]
   for bc in cellBlockLeftPhaseTable.keys():
@@ -86,12 +87,13 @@ proc find_contig_blocks*(blocks:seq[Block],cellBlockLeftPhaseTable: TableRef[str
   if debug: echo "block missing in X number of cells:" & $count_missing
   for i, c in count_missing:
     if c <= contig_block_allow_missing: contig_blocks.add(i)
+  if contig_blocks.len == 0 :
+    contig_blocks.add(minIndex(count_missing))
   return contig_blocks
 
 ## find linkage of two blocks
-## 
 proc find_linkage*(block1: int, block2: int, cellBlockLeftPhaseTable: TableRef[string, seq[int]],cellBlockRightPhaseTable: TableRef[string, seq[int]]): Block_linkage= 
-  if debug: echo "linking " & $block1 & "and" & $block2
+  if debug: echo "linking " & $block1 & " and " & $block2
   var left_right_phase: seq[tuple[left:int,right:int]]
   var linkString: string
   if not (block1 < block2):
@@ -106,7 +108,8 @@ proc find_linkage*(block1: int, block2: int, cellBlockLeftPhaseTable: TableRef[s
     elif linkString in @["01","10"]:
       blockLinkage.linkageType_01.inc
   blockLinkage.switch_posterior_prob = cal_switch_posterior(error_rate = 0.1, (blockLinkage.linkageType_00 + blockLinkage.linkageType_01), blockLinkage.linkageType_01)
-  blockLinkage.switch_confidence =  log10(blockLinkage.switch_posterior_prob) - log10(1-blockLinkage.switch_posterior_prob)
+  blockLinkage.linkage_confidence = log10(blockLinkage.switch_posterior_prob) - log10(1-blockLinkage.switch_posterior_prob)
+  echo "blockLinkage.linkage_confidence : " & $blockLinkage.linkage_confidence
   return blockLinkage
   
 # contig_blocks, the blocks to build contigs. Rest of the blocks are filled in if can
@@ -115,7 +118,7 @@ proc build_contig*(contig_blocks:seq[int], cellBlockLeftPhaseTable: TableRef[str
   var blockLinkage:Block_linkage
   for b in 0..(contig_blocks.len-2):
     blockLinkage = find_linkage(block1=contig_blocks[b], block2=contig_blocks[b+1],cellBlockLeftPhaseTable,cellBlockRightPhaseTable)
-    if blockLinkage.switch_confidence < 0.0:
+    if blockLinkage.linkage_confidence < 0.0:
       blockLinkage.switch = 0
     else: 
       blockLinkage.switch = 1
@@ -127,12 +130,12 @@ proc infer_block_linkage_to_one_contig_block*(contig_block:int,to_infer_block:in
                                              cellBlockRightPhaseTable: TableRef[string, seq[int]],
                                              at_left: bool):int = 
   if at_left:
-    if debug: echo "linking " & $to_infer_block & "to contig block " & $contig_block & " from left"
+    if debug: echo "linking " & $to_infer_block & " to contig block " & $contig_block & " from left"
     var blockLinkage = Block_linkage(prevBlock : to_infer_block, nextBlock :contig_block, switch : -1) 
     blockLinkage = find_linkage(block1 = to_infer_block, block2 = contig_block, cellBlockLeftPhaseTable,cellBlockRightPhaseTable)
-    if abs(blockLinkage.switch_confidence) < confidence_threshold:
+    if abs(blockLinkage.linkage_confidence) < confidence_threshold:
       return -1
-    elif blockLinkage.switch_confidence > 0:
+    elif blockLinkage.linkage_confidence > 0:
       return 1
     else:
       return 0
@@ -140,9 +143,9 @@ proc infer_block_linkage_to_one_contig_block*(contig_block:int,to_infer_block:in
     if debug: echo "linking " & $to_infer_block & "to contig block " & $contig_block & " from right"
     var blockLinkage = Block_linkage(prevBlock: contig_block, nextBlock : to_infer_block , switch: -1) 
     blockLinkage = find_linkage(block1 = contig_block, block2 = to_infer_block, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
-    if abs(blockLinkage.switch_confidence) < confidence_threshold:
+    if abs(blockLinkage.linkage_confidence) < confidence_threshold:
       return -1
-    elif blockLinkage.switch_confidence > 0:
+    elif blockLinkage.linkage_confidence > 0.0 :
       return 1
     else:
       return 0
@@ -157,29 +160,42 @@ proc infer_non_contig_blocks*(contig_blocks:seq[int], linkedContigs:seq[Block_li
   var blocks_phase = newSeqWith(blocks.len,-1)
   var left_contig_block:int
   var right_contig_block: int 
+  if debug: echo "now try loop blocks_phase len " & $blocks_phase.len
+  if debug: echo "now try loop contig_blocks len contig_blocks.len " & $contig_blocks.len
+
   blocks_phase[contig_blocks[0]] = 0
-  for lc in linkedContigs:
-    if lc.switch == 0:
-      if debug: echo "blocks_phase[lc.nextBlock] " & $blocks_phase[lc.nextBlock]
-      
-      blocks_phase[lc.nextBlock] = blocks_phase[lc.prevBlock] 
-    else:
-      blocks_phase[lc.nextBlock] = (1 xor blocks_phase[lc.prevBlock])  
+  if debug: echo "now try loop linkedContigs error due to len " & $linkedContigs.len
+
+  try:
+    for lc in linkedContigs:
+      if lc.switch == 0:
+        if debug: echo "blocks_phase[lc.nextBlock] " & $blocks_phase[lc.nextBlock]
+        blocks_phase[lc.nextBlock] = blocks_phase[lc.prevBlock] 
+      else:
+        blocks_phase[lc.nextBlock] = (1 xor blocks_phase[lc.prevBlock])  
+  except:
+    if debug: echo "loop linkedContigs error due to len " & $linkedContigs.len
+
   if debug: echo "block contig haplotype : " & $blocks_phase
+  if debug: echo " contig blocks are  : " & $contig_blocks
+  
   for b_j in 0..(blocks.len-1):
     if not (b_j in contig_blocks):
       if b_j < contig_blocks[0]:
         hap_sw = infer_block_linkage_to_one_contig_block(contig_block = contig_blocks[0], to_infer_block = b_j, cellBlockLeftPhaseTable,cellBlockRightPhaseTable, at_left=true)
         if hap_sw == 1:
-          blocks_phase[b_j] = (1 xor contig_blocks[0]) 
+          if debug: echo "hap_sw == 1"
+          blocks_phase[b_j] = (1 xor blocks_phase[contig_blocks[0]]) 
         elif hap_sw == 0:
-          blocks_phase[b_j] = contig_blocks[0] 
+          if debug: echo "hap_sw == 0"
+          blocks_phase[b_j] = blocks_phase[contig_blocks[0]] 
+          if debug: echo "blocks_phase[b_j] " & $blocks_phase[b_j]
       elif b_j > contig_blocks[contig_blocks.len-1]:
         hap_sw = infer_block_linkage_to_one_contig_block(contig_block = contig_blocks[contig_blocks.len-1], to_infer_block = b_j,cellBlockLeftPhaseTable,cellBlockRightPhaseTable,at_left=false)
         if hap_sw == 1:
-          blocks_phase[b_j] = (1 xor contig_blocks[0]) 
+          blocks_phase[b_j] = (1 xor blocks_phase[contig_blocks[contig_blocks.len-1]]) 
         elif hap_sw == 0:
-          blocks_phase[b_j] = contig_blocks[0] 
+          blocks_phase[b_j] =  blocks_phase[contig_blocks[contig_blocks.len-1]] 
       else:
         ## in between two contig blocks
         left_contig_block = b_j
@@ -196,11 +212,21 @@ proc infer_non_contig_blocks*(contig_blocks:seq[int], linkedContigs:seq[Block_li
               blocks_phase[b_j] = blocks_phase[right_contig_block]
             elif hap_sw_left == 0:
               blocks_phase[b_j] = blocks_phase[left_contig_block]
-          elif hap_sw_right == 0 and (hap_sw_left == 1 or hap_sw_left == -1):
+          elif hap_sw_right == 0 and (hap_sw_left in [1,-1]):
               blocks_phase[b_j] = blocks_phase[right_contig_block]
-          elif hap_sw_right == 1 and (hap_sw_left == 0 or hap_sw_left == -1):
+          elif hap_sw_right == 1 and (hap_sw_left in [0,-1]):
+              blocks_phase[b_j] = blocks_phase[left_contig_block]
+        elif blocks_phase[left_contig_block] == blocks_phase[right_contig_block]:
+          if (hap_sw_right == -1 and hap_sw_left == 1) or (hap_sw_right == 1 and hap_sw_left == -1) :
+              blocks_phase[b_j] = (1 xor blocks_phase[left_contig_block])
+          elif (hap_sw_right == -1 and hap_sw_left == 0) or (hap_sw_right == 0 and hap_sw_left == -1) :
+              blocks_phase[b_j] = blocks_phase[left_contig_block]
+          elif hap_sw_right == 0 and hap_sw_left == 0:
               blocks_phase[b_j] = blocks_phase[left_contig_block]
+          elif hap_sw_right == 1 and hap_sw_left == 1:
+              blocks_phase[b_j] =  (1 xor blocks_phase[left_contig_block])
     else: continue 
+    if debug: echo "After infer non contig blocks: " & $blocks_phase
   return blocks_phase 
 
 proc write_linked_blocks*(vcfFile:string, outFile: string, blocks:seq[Block], blocks_phase:seq[int], threads:int):int = 
@@ -233,20 +259,20 @@ proc write_linked_blocks*(vcfFile:string, outFile: string, blocks:seq[Block], bl
         # don't need to flip
         if blocks[current_block].h0[block_pos_i] == 0:
           ## 0|1
-          if debug: echo "block hap0 and this h0 is 0 0|1"
+          #if debug: echo "block hap0 and this h0 is 0 0|1"
           gt_string = @[int32(2),int32(5)]
         else:
           ## 1|0
-          if debug: echo "block hap0 and this h0 is 1 1|0"
+          #if debug: echo "block hap0 and this h0 is 1 1|0"
           gt_string = @[int32(4),int32(3)]
       elif blocks_phase[current_block] == 1:
         if blocks[current_block].h0[block_pos_i] == 0:
           ## 1|0
-          if debug: echo "block hap1 and this h0 is 0 1|0"
+          #if debug: echo "block hap1 and this h0 is 0 1|0"
           gt_string = @[int32(4),int32(3)]
         else:
           ## 0|1
-          if debug: echo "block hap1 and this h0 is 1 0|1"
+          #if debug: echo "block hap1 and this h0 is 1 0|1"
           gt_string = @[int32(2),int32(5)]
       if f.set("GT",gt_string) != Status.OK:
         quit "set GT failed"
diff --git a/private/utils.nim b/private/utils.nim
index a4843a15059d469023ab808eced13f6a2e63adaa..e155e2b26500bfb4654c4997c6a0d35d3307c3e0 100755
--- a/private/utils.nim
+++ b/private/utils.nim
@@ -2,6 +2,9 @@ from distributions/rmath import dbinom
 import math
 import tables
 import hts
+import algorithm
+import streams
+import strutils
 
 type 
   allele_expr* = ref object
@@ -9,6 +12,8 @@ type
     calt*: int
   ViState* = enum
     stateRef, stateAlt, stateN
+  BinaryGeno* = enum
+    gUnknown, gREF, gALT
   ViNode* = object
     pos*: int
     cRef*: int
@@ -20,12 +25,29 @@ type
     chrom*: string
     variantIndex*: int
     alleles*: string
-    genotype*: int
+    genotype*: BinaryGeno
   Fragment* = ref object
     cellbarcode*: string
     counter*: int
     node1*: GtNode
     node2*: GtNode
+
+type 
+  MtxRow* = tuple
+    rowIndex: int
+    colIndex: int
+    value: int
+type 
+  Mtx* = ref object
+    header*: string
+    dims*: string
+    lines*: seq[MtxRow]
+
+proc `$`*(x: BinaryGeno): string = 
+  if x == gUnknown: return "-1"
+  if x == gREF: return  "0"
+  if x == gALT: return  "1"
+
 proc get_base_offset*(position:int, align: Record): int =
   var 
     off = align.start.int
@@ -114,4 +136,49 @@ proc countAllele*(ibam:Bam, maxTotalReads:int,
         continue
   if total_reads > maxTotalReads or total_reads < minTotalReads:
     return initTable[string,allele_expr]()
-  return alleleCountTable
\ No newline at end of file
+  return alleleCountTable
+  
+proc readMtx*(mtx_file:string): Mtx = 
+  var mtxFileStream:FileStream
+  try :
+    mtxFileStream = openFileStream(mtx_file, fmRead)
+  except:
+    stderr.write getCurrentExceptionMsg()
+  var sparseMtx =  Mtx()
+  var current_line: string
+#  var entry: MtxRow
+  sparseMtx.header = mtxFileStream.readLine()
+  sparseMtx.dims = mtxFileStream.readLine()
+  while not mtxFileStream.atEnd():
+    current_line = mtxFileStream.readLine()
+    sparseMtx.lines.add((rowIndex: parseInt(current_line.splitWhitespace()[0]),
+                         colIndex: parseInt(current_line.splitWhitespace()[1]),
+                         value: parseInt(current_line.splitWhitespace()[2]) ))
+  mtxFileStream.close()
+
+  return sparseMtx
+# sort by col index
+proc compareMtxRow*(entry1: MtxRow, entry2: MtxRow): int = 
+  if entry1.colIndex < entry2.colIndex: 
+    return -1
+  elif entry1.colIndex == entry2.colIndex: 
+    if entry1.rowIndex <  entry2.rowIndex: 
+      return -1
+    else:
+      return 1
+  elif entry1.colIndex > entry2.colIndex: 
+    return 1
+
+proc sortWriteMtx*(sMtx:var Mtx, mtx_file:string):int = 
+  var mtxFileStream:FileStream
+  try :
+    mtxFileStream = openFileStream(mtx_file, fmWrite)
+  except:
+    stderr.write getCurrentExceptionMsg()
+  var current_line: string
+  sMtx.lines.sort(compareMtxRow)
+  mtxFileStream.writeLine(sMtx.header)
+  mtxFileStream.writeLine(sMtx.dims)
+  for entry in sMtx.lines:
+    mtxFileStream.writeLine(join([$entry.rowIndex, $entry.colIndex, $entry.value],sep = " "))
+  mtxFileStream.close()
diff --git a/sgcocaller.nim b/sgcocaller.nim
index fb06183fd25f9f3b11a7e4da57390d7d39c7093a..f015b97d8f141f4d099d33211db3ee343a75e348 100755
--- a/sgcocaller.nim
+++ b/sgcocaller.nim
@@ -25,11 +25,12 @@ proc getfmf(ibam:Bam, ivcf:VCF, barcodeTable:TableRef, outfmf:FileStream,maxTota
   var cellFragmentsTable = newTable[string,Fragment]()
 
   echo "generated fragments from " & $barcodeTable.len & " single cells"
-
+  var cellGtNodes: Table[string, GtNode]
   for rec in ivcf.query("*"): 
-    var cellGtNodes = findGtNodes(rec=rec, variantIndex= variantIndex, ibam=ibam, maxTotalReads=maxTotalReads,
-                                  minTotalReads=minTotalReads, mapq = mapq, barcodeTable = barcodeTable, minbsq=minbsq,barcodeTag=barcodeTag,
-                                  minCellDp = mindp, minSnpDepth = minsnpdepth)
+    cellGtNodes = findGtNodes(rec=rec, variantIndex = variantIndex, ibam=ibam, maxTotalReads=maxTotalReads,
+                                  minTotalReads=minTotalReads, mapq = mapq, barcodeTable = barcodeTable,
+                                  minbsq=minbsq,barcodeTag=barcodeTag)
+    cellGtNodes = callNodeGenotype(cellGtNodes,minCellDp = mindp, minSnpDepth = minsnpdepth, p0 = 0.3, p1 = 0.8)
     cellFragmentsTable = updateCellFragment(barcodedNodesTable=cellGtNodes,cellFragmentsTable = cellFragmentsTable, fmf_out = outfmf)              
     variantIndex = variantIndex + 1
   echo "processed " & $(variantIndex) & " variants"
@@ -52,6 +53,7 @@ proc phaseBlocks(fmf_file:string, inputVCF:string, outputVCF:string, block_hapfi
   ## 
   ## What if blocks cannot be linked ? the number of 11(00) == 10(01)
   echo "Total blocks " & $chr_blocks.len
+
   var contig_blocks:seq[int]
   for b_j, bl in chr_blocks:
     echo "block " & $b_j & " len: " & $bl.h0.len
@@ -66,8 +68,6 @@ proc phaseBlocks(fmf_file:string, inputVCF:string, outputVCF:string, block_hapfi
     echo "prevBlock" &  $bk.prevBlock & "nextBlock" &  $bk.nextBlock & " 00 type " & $ $bk.linkageType_00 &  " 01 type " & $bk.linkageType_01 &  " switch posterior prob " & $bk.switch_posterior_prob  & " switch value :" & $bk.switch
   phased_blocks = infer_non_contig_blocks(contig_blocks, blockkseq, chr_blocks, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
 
-  # echo phased_blocks
-
   discard  write_linked_blocks(inputVCF,outputVCF, blocks = chr_blocks, blocks_phase = phased_blocks, threads= threads)
   return 0
 proc sgcocaller(threads:int, ivcf:VCF, barcodeTable:TableRef,
@@ -125,7 +125,8 @@ proc sgcocaller(threads:int, ivcf:VCF, barcodeTable:TableRef,
           # if not gt == 0|1 continue
         elif not (ac[0] == 2 and ac[1] == 5): continue
       var alleleCountTable = countAllele(ibam=ibam, chrom=chrom, mapq=mapq, barcodeTable=barcodeTable,minbsq=minbsq,
-                                        maxTotalReads = maxtotal, minTotalReads = mintotal,bulkBam = bulkBam, barcodeTag = barcodeTag, startPos = rec.POS.cint-1, stopPos=rec.POS.cint,rec_alt =  rec_alt, rec_ref = rec_ref)
+                                        maxTotalReads = maxtotal, minTotalReads = mintotal,bulkBam = bulkBam, barcodeTag = barcodeTag,
+                                        startPos = rec.POS.cint-1, stopPos=rec.POS.cint,rec_alt =  rec_alt, rec_ref = rec_ref)
       if alleleCountTable.len==0: continue
 
       ## add to snpAnnoSeq, later write to SNPannot file, which contains SNP.pos, SNP.ref,SNP.alt; The rowAnnotations
@@ -208,7 +209,7 @@ Options:
   --maxDP <maxDP>  the maximum DP for a SNP to be included in the output file [default: 5]
   --maxTotalDP <maxTotalDP>  the maximum DP across all barcodes for a SNP to be included in the output file [default: 25]
   --minTotalDP <minTotalDP>  the minimum DP across all barcodes for a SNP to be included in the output file [default: 10]
-  --minSNPdepth <minSNPdepth>  the minimum depth of coverage for a SNPs to be includes in generated fragments [default: 2]
+  --minSNPdepth <minSNPdepth>  the minimum depth of coverage for a SNPs to be includes in generated fragments [default: 1]
   --thetaREF <thetaREF>  the theta for the binomial distribution conditioning on hidden state being REF [default: 0.1]
   --thetaALT <thetaALT>  the theta for the binomial distribution conditioning on hidden state being ALT [default: 0.9]
   --cmPmb <cmPmb>  the average centiMorgan distances per megabases default 0.1 cm per Mb [default: 0.1]
@@ -242,7 +243,6 @@ Options:
     minsnpdepth:int
     out_vcf,fmf,barcodeFile,bamfile,vcff,hapfile:string
     vcfGtPhased = false
-
   echo $args
   threads = parse_int($args["--threads"])
   barcodeTag = $args["--barcodeTag"]
@@ -312,7 +312,15 @@ Options:
       echo "running for these chromosomes as specified in the header of the provided VCF file " & $s_Chrs
       discard sgcocaller(threads, ivcf, barcodeTable, ibam,
                       out_dir, mapq, minbsq, mintotal, 
-                      maxtotal, mindp, maxdp, thetaREF, thetaALT, cmPmb,s_Chrs,barcodeTag,phased = vcfGtPhased)
+                      maxtotal, mindp, maxdp, thetaREF, thetaALT, cmPmb, s_Chrs,barcodeTag,phased = vcfGtPhased)
+      var outFileTotalCountMtxFile, outFileAltCountMtxFile: string
+      ## sort entries in mtx files 
+      for chrom in s_Chrs:
+        outFileTotalCountMtxFile = out_dir & chrom & "_totalCount.mtx"
+        outFileAltCountMtxFile = out_dir & chrom & "_altCount.mtx"
+        for mtxFile in [outFileTotalCountMtxFile, outFileAltCountMtxFile]:
+          var imtx = readMtx(mtx_file = mtxFile)
+          discard sortWriteMtx(imtx, mtx_file = mtxFile)
     ibam.close()
     ivcf.close()
   if args["phase_blocks"]: