diff --git a/plotDiagnosticHap.R b/plotDiagnosticHap.R
new file mode 100644
index 0000000000000000000000000000000000000000..7b790211030e0756323010f473ddef2ae137c817
--- /dev/null
+++ b/plotDiagnosticHap.R
@@ -0,0 +1,54 @@
+## diagnostic txt to png
+
+## phase/\
+.libPaths("/mnt/beegfs/mccarthy/scratch/general/rlyu/Software/R/4.0/library/")
+
+args <- (commandArgs(trailingOnly = TRUE))
+
+for (i in seq_len(length(args))) {
+  eval(parse(text = args[[i]]))
+}
+
+print(pngfile)
+print(inputfile)
+print(snpAnnotFile)
+
+
+tryCatch(
+ {
+   library(ggplot2) 
+   library(dplyr)
+   library(tidyr)
+   message("Loading pacakges")
+  
+      # for the condition handlers for warnings and error below)
+  },
+  error=function(cond) {
+  
+    message("required R pacakges for plotting diagnostic plots are not available")
+
+    return(NA)
+  })
+
+plot_df <- read.table(file = inputfile,
+                      header = T)
+snpAnnot_df <- read.table(file = snpAnnotFile, header =TRUE)
+snpAnnot_df <- snpAnnot_df[snpAnnot_df$Phase!="-1",]
+plot_df_match <- apply(plot_df,2,function(x){
+  tmp <-  (x == plot_df$templateGeno)
+  tmp[x == 0] <-  NA
+  tmp
+})
+
+
+data.frame(cbind(plot_df_match,snpAnnot_df)) %>%
+  tidyr::pivot_longer(cols = colnames(plot_df)) %>%
+  dplyr::filter(!is.na(value),POS > 1.3e8, POS < 1.35e8) %>%
+  ggplot()+
+  geom_point(mapping = aes(x = POS , y = name, color = value))+
+  scale_color_manual(values = c("FALSE"='red',
+                                "TRUE" = "blue"))+
+    theme_bw(base_size = 18)+geom_vline(mapping = aes(xintercept = c(switch_pos[2])),
+                                        color = "red",size = 1.2)
+
+ggsave(pngfile, dpi = 100, width = 14, height = 6)
diff --git a/private/cal_switch_score.nim b/private/cal_switch_score.nim
index e86b69e6d3ab59cc2245dc3dbd2be76c47f56b89..b83d4a7e9db582214560d829ae84658ea0ec2fea 100755
--- a/private/cal_switch_score.nim
+++ b/private/cal_switch_score.nim
@@ -1,9 +1,265 @@
 ## calcualte switch score for snps positions that are high risk
+import utils
+import sequtils
+import math
+import streams
+# bins of SNP indexes
+import strutils
 
 let binSize = 1000
-let movingStep = 200
-
+let movingStep = 500
+let dissimThresh = 0.0099 
+let lookBeyondSnps = 25
+let debug = false
+let switchPrior = 0.5
+type 
+  switchScoreTuple = tuple
+    switch_scores: seq[float]
+    switch_scores_snpIndex: seq[int]
+    
 # reduce the search space
 # return seq of snp indexes
+# low risk bins are bins that do not have crossover happening in majority of the cells. 
+
+## rough crossover estimates: return binary results, 0 means no crossovers, 1 means there is.
+## simply comparing the dissimilary btw template geno seq with cell's geno seq for the SNPs in the bin
+
+## cell_geno snp by cell
+proc hasCrossover(templ_geno:seq[BinaryGeno], cell_geno: seq[seq[BinaryGeno]]): bool = 
+  let ncells = cell_geno[0].len
+  var ncompared = newSeq[int](ncells)
+  var nmatch = newSeq[int](ncells)
+  for snpi,g in templ_geno:
+    for cellj in 0..(ncells-1):
+      if g != gUnknown and cell_geno[snpi][cellj] != gUnknown:
+        ncompared[cellj].inc
+        if g == cell_geno[snpi][cellj]: nmatch[cellj].inc
+  if debug: echo "ncompared " & $ncompared      
+  let dissim = map(toSeq(0..(ncells-1)), proc(x:int):float =  nmatch[x]/ncompared[x])
+  if debug: echo "dissim " & $dissim
+  var ncrossover = map(dissim, proc(x:float): int = (int)(x > dissimThresh and x < (1-dissimThresh)))
+  if debug: echo "ncrossover " & $ncrossover
+  let nxcells = foldl(ncrossover, a + b)
+  if nxcells < int(floor(0.5 * float(ncells))):
+    if debug: echo "bin had no crossovers : " & $nxcells
+    return false
+  else:
+    if debug: echo "bin had many crossovers : " & $nxcells
+    return true
+
+proc findHighRiskSnps(fullGeno:seq[BinaryGeno], gtMtxByCell:seq[seq[BinaryGeno]]): seq[int] = 
+  #let ncells = gtMtxByCell[0].len
+  let nSnps = gtMtxByCell.len
+  let nBins = (int)(ceil(nSnps / (binSize - movingStep)))
+  if debug: echo "nBins: " & $nBins
+  let binIds = toSeq(0..(nBins-1))
+  var snpPosStart,snpPosEnd: int
+  var highRiskSnps: seq[int]
+  for binI in binIds:
+    if debug: echo "binI " & $binI
+    snpPosStart = (binI)*(binSize - movingStep)
+    if (snpPosStart + binSize) > (nSnps-1): 
+      snpPosEnd = (nSnps-1)
+    else:
+      snpPosEnd = snpPosStart + binSize
+    if (hasCrossover(templ_geno =  fullGeno[snpPosStart..snpPosEnd], 
+                     cell_geno =  gtMtxByCell[snpPosStart..snpPosEnd] )):
+      highRiskSnps = concat(highRiskSnps,toSeq(snpPosStart..snpPosEnd))
+
+    
+  # add a last bin as from end of chrom with length binSize
+  let lastBinStart = nSnps - binSize
+  let lastBinEnd = nSnps - 1
+  if (hasCrossover(templ_geno =  fullGeno[snpPosStart..snpPosEnd], 
+                     cell_geno =  gtMtxByCell[snpPosStart..snpPosEnd])):
+    highRiskSnps = concat(highRiskSnps,toSeq(lastBinStart..lastBinEnd))
+
+  highRiskSnps = deduplicate(highRiskSnps)
+  return highRiskSnps
+   
+proc readPhasedSnpAnnot(phasedSnpAnnotFileStream: FileStream, nSnps:int): seq[BinaryGeno] =
+  var fullGeno = newSeq[BinaryGeno](nSnps)
+  var i = 0
+  var currentEntrySeq: seq[string]
+  while not phasedSnpAnnotFileStream.atEnd():
+    currentEntrySeq = phasedSnpAnnotFileStream.readLine().splitWhitespace()
+    fullGeno[i] = parseInt(currentEntrySeq[3]).toBinaryGeno(format = "01")
+    i.inc
+  if i != nSnps :
+    quit "phased snpAnnot file does not have the same number of rows with gtMtx"
+  return fullGeno
+
+proc calculateProbslog10(hap:seq[BinaryGeno],cell_hap:seq[BinaryGeno], error_rate = 0.1) : float = 
+  var nmatch,ncompare:int
+  for i,g in hap:
+    if (g != gUnknown) and (cell_hap[i] != gUnknown):
+      ncompare.inc
+      if (g == cell_hap[i]): nmatch.inc
+  let nmis = ncompare - nmatch
+  let prob = 0.5*(error_rate^nmis*(1-error_rate)^nmatch + error_rate^nmatch*(1-error_rate)^nmis)
+  return(log10(prob))
+
+proc countNmatch( hap:seq[BinaryGeno],cell_hap:seq[BinaryGeno],nmatch =true): int = 
+  var nmatch,ncompare:int
+  for i,g in hap:
+    if (g != gUnknown) and (cell_hap[i] != gUnknown):
+      ncompare.inc
+      if (g == cell_hap[i]): nmatch.inc
+  let nmis = ncompare - nmatch
+  return nmatch
+
+proc countNMis( hap:seq[BinaryGeno],cell_hap:seq[BinaryGeno],nmatch =true): int = 
+  var nmatch,ncompare:int
+  for i,g in hap:
+    if (g != gUnknown) and (cell_hap[i] != gUnknown):
+      ncompare.inc
+      if (g == cell_hap[i]): nmatch.inc
+  let nmis = ncompare - nmatch
+  return nmis
+
+proc binarySwitch(x: BinaryGeno):BinaryGeno = 
+    if(x == gUnknown):  gUnknown 
+    elif(x == gREF):gALT
+    else: gREF
+
+proc switchHap(hap: seq[BinaryGeno]): seq[BinaryGeno] = 
+  map(hap, binarySwitch)
+
+proc getIthCellHap(gtMtxByCell:seq[seq[BinaryGeno]],cellIndex:int):seq[BinaryGeno] = 
+  map(gtMtxByCell, proc(y: seq[BinaryGeno]): BinaryGeno = y[cellIndex])
+
+proc calSwitchScore(riskySnps: seq[int], gtMtxByCell:seq[seq[BinaryGeno]], fullGeno: seq[BinaryGeno]): switchScoreTuple = 
+  var letfIndexStart,letfIndexEnd,rightIndexStart,rightIndexEnd,offset: int
+  let nSnps = gtMtxByCell.len
+  let ncells =  gtMtxByCell[0].len
+  var hswitch,cell_hap,cell_full_hap,templ_geno,templ_geno_left,templ_geno_right: seq[BinaryGeno]
+  var prob_switch, prob_nonsw: float
+  var leftIndex,rightIndex, switch_score_snpIndex: seq[int]
+  var swscore = newSeq[float](nSnps)
+  for rsnpi in riskySnps:
+    prob_switch = 0.0
+    prob_nonsw = 0.0
+    if fullGeno[rsnpi] == gUnknown: continue
+    for celli in 0..(ncells-1):
+      # if celli == 2: continue
+      leftIndex = newSeq[int]()
+      rightIndex = newSeq[int]()
+      offset = 1
+      cell_full_hap = getIthCellHap(gtMtxByCell, celli)
+      if(cell_full_hap[rsnpi] != gUnknown and fullGeno[rsnpi] != gUnknown ):
+        rightIndex.add(rsnpi)
+      ## need to find the nearest coexisting N snps (N = lookBeyondSnps*2)
+      offset = 1
+      while(leftIndex.len < lookBeyondSnps and ((rsnpi-offset) > 0)):
+        if((cell_full_hap[rsnpi-offset] != gUnknown) and (fullGeno[rsnpi-offset] != gUnknown)):
+          leftIndex.add(rsnpi-offset)
+        offset.inc
+      offset = 1
+      while(rightIndex.len < lookBeyondSnps and ((rsnpi+offset) < nSnps)):
+        if((cell_full_hap[rsnpi+offset] != gUnknown) and (fullGeno[rsnpi+offset] != gUnknown)):
+          rightIndex.add(rsnpi+offset)
+        offset.inc
+      cell_hap = concat(map(concat(leftIndex,rightIndex), proc(x:int):BinaryGeno = cell_full_hap[x])) 
+      templ_geno_left = map(leftIndex, proc(x:int):BinaryGeno = fullGeno[x])
+      templ_geno_right = map(rightIndex, proc(x:int):BinaryGeno = fullGeno[x])
+      templ_geno = concat(templ_geno_left,templ_geno_right)
+      hswitch = concat(templ_geno_left,switchHap(templ_geno_right))
+      # if rsnpi == 16933:
+      #   echo "cell " & $celli
+      #   echo "cell_hap " & $cell_hap
+      #   echo "templ_geno " & $templ_geno
+      #   echo "hswitch " & $hswitch
+      #   echo "prev prob nosw " & $prob_nonsw
+      #   echo "prev prob_switch " & $prob_switch   
+      #   echo "nmatch temp_geno " & $(countNmatch(hap = templ_geno, cell_hap = cell_hap))
+      #   echo "nmatch hswitch " & $(countNmatch(hap = hswitch, cell_hap = cell_hap))
+      #   echo "nmis temp_geno " & $(countNMis(hap = templ_geno, cell_hap = cell_hap))
+      #   echo "nmis hswitch " & $(countNMis(hap = hswitch, cell_hap = cell_hap))    
+      prob_nonsw = prob_nonsw + calculateProbslog10(hap = templ_geno, cell_hap = cell_hap)
+      prob_switch = prob_switch + calculateProbslog10(hap = hswitch, cell_hap = cell_hap)
+
+    #if(prob_switch > prob_nonsw): echo "found positive switch score"
+    swscore[rsnpi] =  log10(switchPrior) + prob_switch - log10(1-switchPrior) - prob_nonsw
+    switch_score_snpIndex.add(rsnpi)
+  return (swscore,switch_score_snpIndex)
+
+## return SNP indexs to switch
+## switchScore, a dense list of switch scores
+proc findSwitchSites(switchScoreT: switchScoreTuple): seq[int] = 
+  var sitesToSwitch = newSeq[int]()
+  var positiveScores: seq[float]
+  var positiveScoresIndex: seq[int]
+  
+  var inPositiveBlock = false
+  for site, score in switchScoreT.switch_scores:
+    if switchScoreT.switch_scores_snpIndex.find(site)>=0:
+      if score <= 0.0 :
+        if inPositiveBlock:
+          inPositiveBlock = false
+          if positiveScoresIndex.len > int(floor(lookBeyondSnps/2)):
+            sitesToSwitch.add(positiveScoresIndex[maxIndex(positiveScores)])
+          continue
+      elif not inPositiveBlock:
+        positiveScores = @[score]
+        positiveScoresIndex = @[site]
+        inPositiveBlock = true
+        continue
+      else:
+        positiveScores.add(score)
+        positiveScoresIndex.add(site)
+  return sitesToSwitch
+let vcf_file = "data/swapped/FVB_NJ.mgp.v5.snps.dbSNP142.homo.alt.modified.swapped.GT.chr1.vcf.gz"
+let bam_file = "data/WC_CNV_43/WC_CNV_43.bam"
+let barcode_file = "data/WC_CNV_43/WC_CNV_43_filteredBC.tsv"
+var
+  gtMtxFileStream,phaseSnpAnnotFileStream,switchScoreFileStream:FileStream
+
+
+let s_Chrs = map(toSeq(1..19), proc(x:int):string = "chr" & $x)
+
+var gtMtxByCell: seq[seq[BinaryGeno]]
+var currentEntrySeq: seq[string]
+var currentEntry: seq[int]
+var nSnps,ncells:int
+## sort entries in mtx files 
+for chrom in s_Chrs:    
+  let gtMtxFile = "data/WC_CNV_43/sgcocaller/gtMtx/" & chrom & "_gtMtx.mtx"
+  let phaseSnpAnnotFile = "data/WC_CNV_43/sgcocaller/phase/" & chrom & "_phased_snpAnnot.txt"
+  let switchScoreFile = "data/WC_CNV_43/sgcocaller/phase/" & chrom & "_switch_score.txt"  
+  try: 
+    gtMtxFileStream = openFileStream(gtMtxFile, fmRead)
+    phaseSnpAnnotFileStream = openFileStream(phaseSnpAnnotFile, fmRead)
+    switchScoreFileStream = openFileStream(switchScoreFile, fmWrite)
+    
+  except:
+    stderr.write getCurrentExceptionMsg() 
+  echo "reading gtMtx to by cell"
+  discard gtMtxFileStream.readLine()
+  discard phaseSnpAnnotFileStream.readLine()
+  #N, i,j
+  currentEntrySeq = gtMtxFileStream.readLine().splitWhitespace()
+  currentEntry = map(currentEntrySeq, proc(x: string): int = parseInt(x))
+  nSnps = currentEntry[0]
+  echo "nSnps " & $nSnps
+  ncells = currentEntry[1]
+  echo "ncells " & $ncells
+
+  echo "totalEntries " & $ currentEntry[2]
+  ## gtMtx is cell by Snp format
+  gtMtxByCell = newSeqWith(nSnps,newSeq[BinaryGeno](ncells))
+  discard readGtMtxToSeq(mtxFileStream = gtMtxFileStream, gtMtx = gtMtxByCell, by_cell = true)
+  var fullGeno = readPhasedSnpAnnot(phasedSnpAnnotFileStream = phaseSnpAnnotFileStream, nSnps = nSnps )
 
-proc findHighRiskSnps(fullGeno:seq[int]): seq[int] = 
\ No newline at end of file
+  var riskySnps = findHighRiskSnps(fullGeno = fullGeno, gtMtxByCell = gtMtxByCell)
+  echo "riskySnps.len " & $riskySnps.len
+  var switchScoresT = calSwitchScore(riskySnps = riskySnps, gtMtxByCell =gtMtxByCell, fullGeno = fullGeno)
+  switchScoreFileStream.writeLine("#" & $findSwitchSites(switchScoresT))
+  echo  $findSwitchSites(switchScoresT)
+  for snpi,score in switchScoresT.switch_scores:
+    if switchScoresT.switch_scores_snpIndex.find(snpi)>=0:
+      switchScoreFileStream.writeLine($score)
+    else: 
+      switchScoreFileStream.writeLine("NA")
+  for fs in [gtMtxFileStream,phaseSnpAnnotFileStream,switchScoreFileStream]:
+    fs.close()
+ 
diff --git a/private/findGtNodes.nim b/private/findGtNodes.nim
new file mode 100755
index 0000000000000000000000000000000000000000..e3b4ea683cdbb9ad1a67467735943542e6b63765
--- /dev/null
+++ b/private/findGtNodes.nim
@@ -0,0 +1,57 @@
+## Genrate GtNode per variant (SNP)
+
+import tables
+import hts
+import utils
+
+# minSnpDepth = 2, this SNP should at least be covered by two cells
+proc findGtNodes*(rec:Variant, variantIndex: int, ibam:Bam, maxTotalReads:int, minTotalReads:int, mapq: int,
+                  barcodeTable:TableRef,
+                  minbsq:int,
+                  barcodeTag:string ): Table[string,GtNode] =
+
+  var barcodedNodesTable = initTable[string,GtNode]()
+  var rec_alt:char
+  var total_reads = 0
+  rec_alt = rec.ALT[0][0]
+  for aln in ibam.query(chrom = $rec.CHROM,start = rec.POS.cint-1, stop = rec.POS.cint):
+    var cbt = tag[string](aln, barcodeTag)
+    var currentCB: string
+    var base_off: int
+    var base: char
+    if cbt.isNone:
+      continue  
+    else:
+      currentCB = cbt.get  
+    if aln.flag.unmapped or aln.mapping_quality.cint < mapq or aln.flag.dup: continue
+    ## skip unmapped, duplicated, mapq low reads or aln.flag.secondary or aln.flag.supplementary
+    ## if not aln.flag.proper_pair: continue
+    if not barcodeTable.hasKey(currentCB): 
+      continue
+    base_off = get_base_offset(position = rec.POS.cint, align = aln) 
+    base = aln.base_at(base_off)
+    if aln.base_quality_at(base_off).cint < minbsq: 
+      continue
+    total_reads+=1
+    if barcodedNodesTable.hasKey(currentCB):
+      if base == rec.REF[0]: 
+     #   echo "adding ref geno to cell " & currentCB
+        barcodedNodesTable[currentCB].alleles = add_allele(barcodedNodesTable[currentCB],alleleBinGeno = 0)
+        continue
+      if base == rec_alt: 
+        barcodedNodesTable[currentCB].alleles = add_allele(barcodedNodesTable[currentCB],alleleBinGeno = 1)
+        continue
+    else:
+    #  echo "creating new node for cell " & currentCB
+      var newGtNode = GtNode(chrom: $rec.CHROM, variantIndex: variantIndex)
+      if base == rec.REF[0]: 
+        newGtNode.alleles = "0"
+        barcodedNodesTable[currentCB] = newGtNode
+        continue
+      if base == rec_alt: 
+        newGtNode.alleles = "1"
+        barcodedNodesTable[currentCB] = newGtNode
+        continue
+  if total_reads > maxTotalReads or total_reads < minTotalReads:
+    return initTable[string,GtNode]()
+  return barcodedNodesTable  
diff --git a/private/find_template_cell.nim b/private/find_template_cell.nim
index 41478f3105b4affa3176f83f60e060f13b43f832..ed743a6ec7237842af8bf0f8c9818935ff7ac1bf 100755
--- a/private/find_template_cell.nim
+++ b/private/find_template_cell.nim
@@ -18,24 +18,6 @@ let
   minCoexit = 1000
   maxDissim = 0.0099
 
-var nsnps,ncells,totalEntries:int
-
-proc toBinaryGeno(x: int) : BinaryGeno = 
-  if x == 1: return gREF
-  if x == 2: return gALT
-  quit "mtx geno is not 1 or 2"
-
-proc readGtMtx*(mtxFileStream:FileStream, gtMtx:var seq[seq[BinaryGeno]]):int = 
-  var currentEntrySeq:seq[int]
-  var currentLine:seq[string]
-
-  while not mtxFileStream.atEnd():
-    currentLine = mtxFileStream.readLine().splitWhitespace()
-    ## i j 1-based from file
-    currentEntrySeq = map(currentLine, proc(x: string): int = parseInt(x))
-    gtMtx[(currentEntrySeq[1]-1)][(currentEntrySeq[0]-1)] = currentEntrySeq[2].toBinaryGeno
-  return 0
-
 proc countCoexistMatch(seq1: seq[BinaryGeno], seq2: seq[BinaryGeno]): seq[int] = 
   if not (seq1.len == seq2.len):
     quit "cells' genotypes under comparison do not have the same length"
@@ -43,7 +25,7 @@ proc countCoexistMatch(seq1: seq[BinaryGeno], seq2: seq[BinaryGeno]): seq[int] =
   for i, g in seq1:
     if g != gUnknown: cell1Snps += 1
     if (seq2[i] != gUnknown): cell2Snps += 1
-    if (g != gUnknown and seq2[i]!= gUnknown):
+    if (g != gUnknown and seq2[i] != gUnknown):
       ncoexist += 1
       if(g == seq2[i]):
         nmatch += 1
@@ -71,7 +53,6 @@ proc findCellPairs(gtMtx:seq[seq[BinaryGeno]],nPairs = 3, nSnpsPercell: var seq[
 proc findCellBynSNPs*(nSnpsPercell:seq[int],q = 0.75): int = 
   # return the selected cells' index
   # not too many SNPs or too few
-  var chosed:int
   var seqNsnps = nSnpsPercell
   sort(seqNsnps, system.cmp[int])
   int(floor(float(seqNsnps.len) * q))
@@ -79,7 +60,7 @@ proc findCellBynSNPs*(nSnpsPercell:seq[int],q = 0.75): int =
 proc selectTemplateCell*(gtMtx:seq[seq[BinaryGeno]], nPairs = 3): int = 
   var nSnpsCells = newSeq[int](gtMtx.len)
   var cellPairs = findCellPairs(gtMtx = gtMtx, nPairs = nPairs, nSnpsPercell = nSnpsCells)
-  echo "cellPairs"
+  echo "cellPairs: "
   echo cellPairs
   var selectedCell, icp: int
   if cellPairs[0].coexistSnps == 0 :
@@ -94,44 +75,3 @@ proc selectTemplateCell*(gtMtx:seq[seq[BinaryGeno]], nPairs = 3): int =
   else:
     echo "selected cell: " & $cellPairs[icp].cell2 & " nSnps: " & $nSnpsCells[cellPairs[icp].cell2]
     return cellPairs[icp].cell2
-
-var gtMtx:seq[seq[BinaryGeno]]  
-var currentEntry:seq[int]
-var currentEntrySeq:seq[string]
-var gtMtxFileStream:FileStream
-## i, j are 1-based
-let mtxFile = "/mnt/mcfiles/rlyu/Projects/sgcocaller/test_data/WC_CNV_chr1_nonzero.mtx"
-echo mtxFile
-
-try: 
-  gtMtxFileStream = openFileStream(mtx_file, fmRead)
-except:
-  stderr.write getCurrentExceptionMsg()
-# %%MatrixMarket matrix coordinate integer general
-discard gtMtxFileStream.readLine()
-#N, i,j
-currentEntrySeq = gtMtxFileStream.readLine().splitWhitespace()
-currentEntry = map(currentEntrySeq, proc(x: string): int = parseInt(x))
-nsnps = currentEntry[0]
-echo "nsnps " & $nsnps
-ncells = currentEntry[1]
-echo "ncells " & $ncells
-
-totalEntries = currentEntry[2]
-
-echo "totalEntries " & $totalEntries
-
-
-gtMtx = newSeqWith(ncells,newSeq[BinaryGeno](nsnps))
-
-discard readGtMtx(gtMtxFileStream,gtMtx)
-#var colIndex = 4
-#echo gtMtx[0][27..40]
-
-#echo sliceColumn(gtMtx,56)
-#echo gtMtx.len
-
-#echo findCellPairs(gtMtx = gtMtx)
-#var nSnpsCells = newSeqWith(ncells,0)
-#selectTemplateCell
-echo selectTemplateCell(gtMtx = gtMtx, nPairs =3)
\ No newline at end of file
diff --git a/private/genotype_mtx.nim b/private/genotype_mtx.nim
index a935cb1e645cb349f6c4017effb65e942c98089f..9c11d79009aac6a9b5e2054f11338bee6d153386 100755
--- a/private/genotype_mtx.nim
+++ b/private/genotype_mtx.nim
@@ -13,15 +13,21 @@ import findGtNodes
 ## these outputs will be per chromosome
 
 
-proc getGtMtx(ibam:Bam, ivcf:VCF, barcodeTable:TableRef, outGtMtx:FileStream, outSnpAnnot:FileStream, 
+proc getGtMtx*(ibam:Bam, ivcf:VCF, barcodeTable:TableRef, outGtMtx:FileStream, outSnpAnnot:FileStream, 
               outTotalCountMtx:FileStream, outAltCountMtx:FileStream,maxTotalReads:int,
               minTotalReads:int,  mapq: int,minbsq:int, barcodeTag:string, minsnpdepth:int,minCellDp:int, maxCellDp:int,p0 = 0.3, p1 = 0.8):int = 
   var variantIndex = 1
   var writtenVarintIndex = 1
   var calt,cellDP,geno,cellIndex,nnsize = 0
-  var delBarcodes:seq[string]
-  var cellGtNodes: Table[string, GtNode]
-  var cellGtNodeCopy: Table[string, GtNode]
+  var cellGtNodes,cellGtNodeCopy: Table[string, GtNode]
+    
+  ## write headers to those mtx files and the first line place holder for total_row total_column total_entry
+  let sparseMatrixHeader = "%%MatrixMarket matrix coordinate integer general"
+
+  for outFileStream in [outGtMtx,outTotalCountMtx,outAltCountMtx]:
+    outFileStream.writeLine(sparseMatrixHeader)
+    outFileStream.writeLine(' '.repeat(50))
+  outSnpAnnot.writeLine(join(["POS","REF", "ALT"], sep = "\t"))
   for rec in ivcf.query("*"): 
     var rec_alt = rec.ALT[0][0]
     var rec_ref = rec.REF[0]
@@ -59,80 +65,72 @@ proc getGtMtx(ibam:Bam, ivcf:VCF, barcodeTable:TableRef, outGtMtx:FileStream, ou
         nnsize.inc
       outSnpAnnot.writeLine(join([$rec.POS, $rec_ref, $rec_alt], sep="\t") )
       writtenVarintIndex.inc
-    ## write to matrices
+    ## write to matrices 
           
   echo "processed " & $(variantIndex) & " variants"
-  echo "wrote " & $(writtenVarintIndex) & " variants to matrices and snpAnnot file"
+  echo "wrote " & $(writtenVarintIndex-1) & " variants to matrices and snpAnnot file"
   for outFileStream in [outGtMtx,outTotalCountMtx,outAltCountMtx]:
     outFileStream.setPosition(49)
-    outFileStream.write($(writtenVarintIndex) & " " & $barcodeTable.len & " " & $nnsize) 
-  for outFileStream in[outGtMtx,outTotalCountMtx,outAltCountMtx, outSnpAnnot]:
+    outFileStream.write($(writtenVarintIndex-1) & " " & $barcodeTable.len & " " & $nnsize) 
+  for outFileStream in[outGtMtx, outTotalCountMtx, outAltCountMtx, outSnpAnnot]:
     outFileStream.close()
   return 0
 
-let vcf_file = "data/swapped/FVB_NJ.mgp.v5.snps.dbSNP142.homo.alt.modified.swapped.GT.chr1.vcf.gz"
-let bam_file = "data/WC_CNV_42/WC_CNV_42.bam"
-let barcode_file = "data/WC_CNV_42/WC_CNV_42_filteredBC.tsv"
-var
-  ibam:Bam
-  ivcf:VCF
-  outGtMtx, outSnpAnnot, outTotalCountMtx, outAltCountMtx:FileStream
-if not open(ibam, bam_file, threads=1, index = true):
-    quit "couldn't open input bam"
-if not open(ivcf, vcf_file, threads=1):
-    quit "couldn't open: vcf file"
-
-var hf = hts.hts_open(cstring(barcode_file), "r")
-#### TODO : Table size
-var barcodeTable =  newTable[string,int](initialSize = 1024)
-var kstr: hts.kstring_t
-kstr.l = 0
-kstr.m = 0
-kstr.s = nil
-var ithSperm = 0
-## initiate the table with CB as keys, allele counts (ref object) as elements
-while hts_getline(hf, cint(10), addr kstr) > 0:
-  if $kstr.s[0] == "#":
-    continue
-  var v = $kstr.s
-  discard barcodeTable.hasKeyOrPut(v, ithSperm)
-  ithSperm.inc
-discard hf.hts_close()
-
-let s_Chrs = @["chr1"]
-var chrom = "chr1"
-let out_prefix = "test_data/getMtx/"
-echo "generating gtMtx"
-try: 
-  outGtMtx = openFileStream(out_prefix & chrom &  "_gtMtx.mtx", fmWrite)
-  outSnpAnnot = openFileStream(out_prefix & chrom &  "_snpAnnot.txt", fmWrite)
-  outTotalCountMtx = openFileStream(out_prefix & chrom &  "_totalMtx.mtx", fmWrite)
-  outAltCountMtx = openFileStream(out_prefix & chrom &  "_AltMtx.mtx", fmWrite)
-except:
-  stderr.write getCurrentExceptionMsg()
+# let vcf_file = "data/swapped/FVB_NJ.mgp.v5.snps.dbSNP142.homo.alt.modified.swapped.GT.chr1.vcf.gz"
+# let bam_file = "data/WC_CNV_42/WC_CNV_42.bam"
+# let barcode_file = "data/WC_CNV_42/WC_CNV_42_filteredBC.tsv"
+# var
+#   ibam:Bam
+#   ivcf:VCF
+#   outGtMtx, outSnpAnnot, outTotalCountMtx, outAltCountMtx:FileStream
+# if not open(ibam, bam_file, threads=1, index = true):
+#     quit "couldn't open input bam"
+# if not open(ivcf, vcf_file, threads=1):
+#     quit "couldn't open: vcf file"
 
-## write headers to those mtx files and the first line place holder for total_row total_column total_entry
-let sparseMatrixHeader = "%%MatrixMarket matrix coordinate integer general"
+# var hf = hts.hts_open(cstring(barcode_file), "r")
+# #### TODO : Table size
+# var barcodeTable =  newTable[string,int](initialSize = 1024)
+# var kstr: hts.kstring_t
+# kstr.l = 0
+# kstr.m = 0
+# kstr.s = nil
+# var ithSperm = 0
+# ## initiate the table with CB as keys, allele counts (ref object) as elements
+# while hts_getline(hf, cint(10), addr kstr) > 0:
+#   if $kstr.s[0] == "#":
+#     continue
+#   var v = $kstr.s
+#   discard barcodeTable.hasKeyOrPut(v, ithSperm)
+#   ithSperm.inc
+# discard hf.hts_close()
 
-for outFileStream in [outGtMtx,outTotalCountMtx,outAltCountMtx]:
-  outFileStream.writeLine(sparseMatrixHeader)
-  outFileStream.writeLine(' '.repeat(50))
+# let s_Chrs = @["chr1"]
+# var chrom = "chr1"
+# let out_prefix = "test_data/getMtx/"
+# echo "generating gtMtx"
+# try: 
+#   outGtMtx = openFileStream(out_prefix & chrom &  "_gtMtx.mtx", fmWrite)
+#   outSnpAnnot = openFileStream(out_prefix & chrom &  "_snpAnnot.txt", fmWrite)
+#   outTotalCountMtx = openFileStream(out_prefix & chrom &  "_totalMtx.mtx", fmWrite)
+#   outAltCountMtx = openFileStream(out_prefix & chrom &  "_AltMtx.mtx", fmWrite)
+# except:
+#   stderr.write getCurrentExceptionMsg()
 
-outSnpAnnot.writeLine(join(["POS","REF", "ALT"], sep = "\t"))
 
-discard getGtMtx(ibam = ibam, ivcf = ivcf, barcodeTable = barcodeTable, outGtMtx = outGtMtx, outTotalCountMtx = outTotalCountMtx ,outAltCountMtx = outAltCountMtx,
-                 outSnpAnnot = outSnpAnnot, maxTotalReads = 30,
-                 minTotalReads = 5, mapq = 20, minbsq = 13, minCellDp = 2,maxCellDp =10,barcodeTag = "CB",minsnpdepth=1)
+# discard getGtMtx(ibam = ibam, ivcf = ivcf, barcodeTable = barcodeTable, outGtMtx = outGtMtx, outTotalCountMtx = outTotalCountMtx ,outAltCountMtx = outAltCountMtx,
+#                  outSnpAnnot = outSnpAnnot, maxTotalReads = 30,
+#                  minTotalReads = 5, mapq = 20, minbsq = 13, minCellDp = 2,maxCellDp =10,barcodeTag = "CB",minsnpdepth=1)
 
 
-var outGtMtxFile, outTotalCountMtxFile,outAltCountMtxFile : string
+# var outGtMtxFile, outTotalCountMtxFile,outAltCountMtxFile : string
 
-## sort entries in mtx files 
-for chrom in s_Chrs:
-  outGtMtxFile = out_prefix & chrom & "_gtMtx.mtx"
-  outTotalCountMtxFile = out_prefix & chrom &  "_totalMtx.mtx"
-  outAltCountMtxFile = out_prefix & chrom &  "_AltMtx.mtx"
-  for mtxFile in [outGtMtxFile, outTotalCountMtxFile,outAltCountMtxFile]:
-    var imtx = readMtx(mtx_file = mtxFile)
-    discard sortWriteMtx(imtx, mtx_file = mtxFile)
+# ## sort entries in mtx files 
+# for chrom in s_Chrs:
+#   outGtMtxFile = out_prefix & chrom & "_gtMtx.mtx"
+#   outTotalCountMtxFile = out_prefix & chrom &  "_totalMtx.mtx"
+#   outAltCountMtxFile = out_prefix & chrom &  "_AltMtx.mtx"
+#   for mtxFile in [outGtMtxFile, outTotalCountMtxFile,outAltCountMtxFile]:
+#     var imtx = readMtx(mtx_file = mtxFile)
+#     discard sortWriteMtx(imtx, mtx_file = mtxFile)
 
diff --git a/private/infer_missing_snps.nim b/private/infer_missing_snps.nim
index ab4174623d6846b85cd8999c16bc2f1e4346e457..b3874f7b52233a223347cb995aebfff20da429c8 100755
--- a/private/infer_missing_snps.nim
+++ b/private/infer_missing_snps.nim
@@ -2,14 +2,12 @@
 
 ## author Ruqian Lyu
 ## rlyu@svi.edu.au
-import find_template_cell
 import sequtils
 import math
-import streams
-import strutils
 import utils
 
 let nAnchorSnps = 10
+
 proc sliceColumn*(gtMtx:seq[seq[BinaryGeno]],colIndex:int): seq[BinaryGeno] = 
   map(gtMtx, proc(x:seq[BinaryGeno]):BinaryGeno = x[colIndex])
 
@@ -43,9 +41,8 @@ proc inferGeno(type00:int, type10:int, error_rate = 0.1,posterior_thresh = 0.98)
       return gALT
   return gUnknown
 # return full sequence of template geno by inferring missing SNPs's genotypes 
-proc inferSnpGeno(templateGeno: seq[BinaryGeno], gtMtx:seq[seq[BinaryGeno]], posterior_thresh = 0.99):seq[BinaryGeno] = 
+proc inferSnpGeno*(templateGeno: seq[BinaryGeno], gtMtx:seq[seq[BinaryGeno]], posterior_thresh = 0.99):seq[BinaryGeno] = 
   var fullGeno = templateGeno
-  let nCells = gtMtx.len
   let nSnps = gtMtx[0].len
   var coexisPos:seq[int]
   var snpLD: seq[float]
@@ -71,7 +68,8 @@ proc inferSnpGeno(templateGeno: seq[BinaryGeno], gtMtx:seq[seq[BinaryGeno]], pos
           if((gtMtx[j][i-offset] != gUnknown) and (fullGeno[i-offset] != gUnknown)):
             coexisPos.add(i-offset)
         offset += 1
-      snpLD = matchTemplateHap(cell_geno = map(coexisPos,proc(x:int): BinaryGeno =  gtMtx[j][x]), temp_geno = map(coexisPos,proc(x:int): BinaryGeno =  fullGeno[x]) )
+      snpLD = matchTemplateHap(cell_geno = map(coexisPos,proc(x:int): BinaryGeno =  gtMtx[j][x]), 
+                               temp_geno = map(coexisPos,proc(x:int): BinaryGeno =  fullGeno[x]) )
       if snpLD[1] > posterior_thresh:
         if snpLD[0] == 1.0:
           if snpiGeno == gREF: ## snpiGeno is 1 or 2 
@@ -87,44 +85,5 @@ proc inferSnpGeno(templateGeno: seq[BinaryGeno], gtMtx:seq[seq[BinaryGeno]], pos
     # echo "type00 " & $type00
     # echo "type10 " & $type10
     fullGeno[i] = inferGeno(type00, type10)  
-    if i > 20:
-      break
   return fullGeno
 
-
-
-var gtMtx:seq[seq[BinaryGeno]]  
-var currentEntry:seq[int]
-var currentEntrySeq:seq[string]
-var gtMtxFileStream:FileStream
-## i, j are 1-based
-let mtxFile = "/mnt/mcfiles/rlyu/Projects/sgcocaller/test_data/WC_CNV_chr1_nonzero.mtx"
-echo mtxFile
-
-try: 
-  gtMtxFileStream = openFileStream(mtx_file, fmRead)
-except:
-  stderr.write getCurrentExceptionMsg()
-# %%MatrixMarket matrix coordinate integer general
-discard gtMtxFileStream.readLine()
-#N, i,j
-currentEntrySeq = gtMtxFileStream.readLine().splitWhitespace()
-currentEntry = map(currentEntrySeq, proc(x: string): int = parseInt(x))
-var nsnps = currentEntry[0]
-echo "nsnps " & $nsnps
-var ncells = currentEntry[1]
-echo "ncells " & $ncells
-
-var totalEntries = currentEntry[2]
-
-echo "totalEntries " & $totalEntries
-
-
-gtMtx = newSeqWith(ncells,newSeq[BinaryGeno](nsnps))
-
-discard readGtMtx(gtMtxFileStream,gtMtx)
-
-var tempcell = selectTemplateCell(gtMtx = gtMtx, nPairs =3)
-var tempcellGeno = gtMtx[tempcell]
-let fullGeno = inferSnpGeno(tempcellGeno, gtMtx)
-echo fullGeno.len
diff --git a/private/sgphase.nim b/private/sgphase.nim
new file mode 100755
index 0000000000000000000000000000000000000000..6c669d6ba08acb114ce98d7cfe14781a529819ba
--- /dev/null
+++ b/private/sgphase.nim
@@ -0,0 +1,94 @@
+# phase from genotype matrices generated from sgcocaller gtMtx
+
+import find_template_cell
+import infer_missing_snps
+import utils
+import streams
+import strutils
+import sequtils
+
+# phaseOutdir/phased_snpAnnot.txt
+# phaseOutdir/cellGenoVersusTemplate.txt
+# phaseOutdir/cellGenoVersusTemplate.png by executing the R code
+# let mtxFile = "/mnt/mcfiles/rlyu/Projects/sgcocaller/test_data/gtMtx_chr1_gtMtx.mtx"
+# let phaseOutdir = "/mnt/mcfiles/rlyu/Projects/sgcocaller/test_data/phase/"
+# #let diagnosticDataframe = "/mnt/mcfiles/rlyu/Projects/sgcocaller/test_data/phase/cellGenoVersusTemplate.txt"
+# let snpAnnotFile = "/mnt/mcfiles/rlyu/Projects/sgcocaller/test_data/gtMtx_chr1_snpAnnot.txt"
+
+
+proc writePhasedSnpAnnot*(fullGeno:seq[BinaryGeno], snpAnnotFileStream:FileStream, phasedSnpAnnotFileStream:FileStream): int = 
+  var snpindex = 0
+  var header: string
+  var snp_rec: string
+  header = snpAnnotFileStream.readLine()
+  phasedSnpAnnotFileStream.writeLine(join([header,"Phase"],sep = "\t"))
+  while not snpAnnotFileStream.atEnd():
+    snp_rec = snpAnnotFileStream.readLine()
+    phasedSnpAnnotFileStream.writeLine(join([snp_rec,$fullGeno[snpindex]], sep  = "\t") )
+    snpindex.inc
+  if snpindex != (fullGeno.len):
+    quit "snpAnnot.txt does not have the same number of rows with gtMtx"
+  return 0 
+ 
+proc sgphase*(mtxFile: string, snpAnnotFile:string, phaseOutdir:string): int = 
+  var nsnps,ncells,totalEntries:int
+  var gtMtx:seq[seq[BinaryGeno]]  
+  var currentEntry:seq[int]
+  var currentEntrySeq:seq[string]
+  var gtMtxFileStream,ddframFileStream,snpAnnotFileStream,phasedSnpAnnotFileStream:FileStream
+  let phasedSnpAnnotFile = phaseOutdir & "phased_snpAnnot.txt"
+  let diagnosticDataframeFile = phaseOutdir & "cellGenoVersusTemplate.txt"
+  ## i, j are 1-based, entries with value 1 or 2
+  try: 
+    gtMtxFileStream = openFileStream(mtxFile, fmRead)
+    ddframFileStream = openFileStream(diagnosticDataframeFile, fmWrite)
+    snpAnnotFileStream = openFileStream(snpAnnotFile, fmRead)
+    phasedSnpAnnotFileStream = openFileStream(phasedSnpAnnotFile, fmWrite)
+  except:
+    stderr.write getCurrentExceptionMsg()
+  # %%MatrixMarket matrix coordinate integer general
+  discard gtMtxFileStream.readLine()
+  #N, i,j
+  currentEntrySeq = gtMtxFileStream.readLine().splitWhitespace()
+  currentEntry = map(currentEntrySeq, proc(x: string): int = parseInt(x))
+  nsnps = currentEntry[0]
+  echo "nsnps " & $nsnps
+  ncells = currentEntry[1]
+  echo "ncells " & $ncells
+  totalEntries = currentEntry[2]
+  echo "totalEntries " & $totalEntries
+  ## gtMtx is cell by Snp format
+  gtMtx = newSeqWith(ncells,newSeq[BinaryGeno](nsnps))
+  discard readGtMtxToSeq(gtMtxFileStream,gtMtx)
+  var tempcell = selectTemplateCell(gtMtx = gtMtx, nPairs =3)
+  echo "template cell is " & $tempcell
+  var tempcellGeno = gtMtx[tempcell]
+  let fullGeno = inferSnpGeno(tempcellGeno, gtMtx)
+  # generate the txt for diagnositc plot
+  var selectedCells:seq[int]
+  if ncells < 10:
+    selectedCells = (0..(ncells-1)).toSeq
+  else:
+    selectedCells = (0..9).toSeq
+  var headliner = "templateGeno"
+  var writeOut = ""
+  for c in selectedCells:
+    headliner = headliner & " cell" & $c
+  ddframFileStream.writeLine(headliner)
+  let subsetGtMtx = map(selectedCells, proc(x: int): seq[BinaryGeno] = gtMtx[x] )
+  for k,g in fullGeno:
+    if g == gUnknown: continue
+    else:
+      writeOut = $(parseInt($g) + 1) 
+      for cellg in sliceColumn(subsetGtMtx, k):
+        writeOut = writeOut & " " & $(parseInt($cellg)+1)
+    ddframFileStream.writeLine(writeOut)
+  
+  discard writePhasedSnpAnnot(fullGeno, snpAnnotFileStream, phasedSnpAnnotFileStream)
+  for fs in [gtMtxFileStream,ddframFileStream,snpAnnotFileStream,phasedSnpAnnotFileStream]:
+    fs.close()
+
+
+
+
+
diff --git a/private/utils.nim b/private/utils.nim
index e155e2b26500bfb4654c4997c6a0d35d3307c3e0..01370af097ed3a68c0fa66b81d510d2183e5999c 100755
--- a/private/utils.nim
+++ b/private/utils.nim
@@ -5,6 +5,7 @@ import hts
 import algorithm
 import streams
 import strutils
+import sequtils
 
 type 
   allele_expr* = ref object
@@ -70,6 +71,34 @@ proc get_base_offset*(position:int, align: Record): int =
     base_offset = qoff - over-1
     break 
   return base_offset   
+
+
+proc toBinaryGeno*(x: int, format = "12") : BinaryGeno = 
+  if(format == "01"):
+    if x == 1: return gALT
+    if x == 0: return gREF
+    if x == -1: return gUnknown
+    quit "mtx file geno is not 0,1 or -1"
+  else:
+    if x == 1: return gREF
+    if x == 2: return gALT
+    quit "mtx file geno is not 1 or 2"
+
+## cell by SNP by default
+proc readGtMtxToSeq*(mtxFileStream:FileStream, gtMtx:var seq[seq[BinaryGeno]], by_cell = false):int = 
+  var currentEntrySeq:seq[int]
+  var currentLine:seq[string]
+
+  while not mtxFileStream.atEnd():
+    currentLine = mtxFileStream.readLine().splitWhitespace()
+    ## i j 1-based from file
+    currentEntrySeq = map(currentLine, proc(x: string): int = parseInt(x))
+    if by_cell:
+      gtMtx[(currentEntrySeq[0]-1)][(currentEntrySeq[1]-1)] = currentEntrySeq[2].toBinaryGeno
+    else:
+      gtMtx[(currentEntrySeq[1]-1)][(currentEntrySeq[0]-1)] = currentEntrySeq[2].toBinaryGeno
+  return 0
+
 proc add_allele*(g:GtNode, alleleBinGeno:int):string = g.alleles & $alleleBinGeno
 
 proc inc_count_cref*(a:allele_expr) = inc(a.cref)
@@ -175,7 +204,6 @@ proc sortWriteMtx*(sMtx:var Mtx, mtx_file:string):int =
     mtxFileStream = openFileStream(mtx_file, fmWrite)
   except:
     stderr.write getCurrentExceptionMsg()
-  var current_line: string
   sMtx.lines.sort(compareMtxRow)
   mtxFileStream.writeLine(sMtx.header)
   mtxFileStream.writeLine(sMtx.dims)
diff --git a/sgcocaller.nim b/sgcocaller.nim
index f015b97d8f141f4d099d33211db3ee343a75e348..542c5baa08f9dc943e9891b119a2469d59240657 100755
--- a/sgcocaller.nim
+++ b/sgcocaller.nim
@@ -9,67 +9,65 @@ import hts
 import tables
 import sequtils
 import private/utils
-import private/fragments
 import math
 import streams
 import private/graph
 import private/findpath
-import private/blocks_utils
-import private/readfmf
-import private/phase_blocks
+import private/genotype_mtx
+import private/sgphase
 
+# proc getfmf(ibam:Bam, ivcf:VCF, barcodeTable:TableRef, outfmf:FileStream,maxTotalReads:int,
+#                   minTotalReads:int,  mapq: int,minbsq:int,mindp:int,barcodeTag:string, minsnpdepth:int):int = 
+#   var variantIndex = 1
+#   var cellFragmentsTable = newTable[string,Fragment]()
 
-proc getfmf(ibam:Bam, ivcf:VCF, barcodeTable:TableRef, outfmf:FileStream,maxTotalReads:int,
-                  minTotalReads:int,  mapq: int,minbsq:int,mindp:int,barcodeTag:string, minsnpdepth:int):int = 
-  var variantIndex = 1
-  var cellFragmentsTable = newTable[string,Fragment]()
+#   echo "generated fragments from " & $barcodeTable.len & " single cells"
+#   var cellGtNodes: Table[string, GtNode]
+#   for rec in ivcf.query("*"): 
+#     cellGtNodes = findGtNodes(rec=rec, variantIndex = variantIndex, ibam=ibam, maxTotalReads=maxTotalReads,
+#                                   minTotalReads=minTotalReads, mapq = mapq, barcodeTable = barcodeTable,
+#                                   minbsq=minbsq,barcodeTag=barcodeTag)
+#     cellGtNodes = callNodeGenotype(cellGtNodes,minCellDp = mindp, minSnpDepth = minsnpdepth, p0 = 0.3, p1 = 0.8)
+#     cellFragmentsTable = updateCellFragment(barcodedNodesTable=cellGtNodes,cellFragmentsTable = cellFragmentsTable, fmf_out = outfmf)              
+#     variantIndex = variantIndex + 1
+#   echo "processed " & $(variantIndex) & " variants"
+#   return 0
 
-  echo "generated fragments from " & $barcodeTable.len & " single cells"
-  var cellGtNodes: Table[string, GtNode]
-  for rec in ivcf.query("*"): 
-    cellGtNodes = findGtNodes(rec=rec, variantIndex = variantIndex, ibam=ibam, maxTotalReads=maxTotalReads,
-                                  minTotalReads=minTotalReads, mapq = mapq, barcodeTable = barcodeTable,
-                                  minbsq=minbsq,barcodeTag=barcodeTag)
-    cellGtNodes = callNodeGenotype(cellGtNodes,minCellDp = mindp, minSnpDepth = minsnpdepth, p0 = 0.3, p1 = 0.8)
-    cellFragmentsTable = updateCellFragment(barcodedNodesTable=cellGtNodes,cellFragmentsTable = cellFragmentsTable, fmf_out = outfmf)              
-    variantIndex = variantIndex + 1
-  echo "processed " & $(variantIndex) & " variants"
-  return 0
-proc phaseBlocks(fmf_file:string, inputVCF:string, outputVCF:string, block_hapfile:string, threads:int):int =
-  var cellGenoTable: TableRef[string,TableRef[string,int]]
-  var cellBlockLeftPhaseTable,cellBlockRightPhaseTable :TableRef[string, seq[int]]
-  var chr_blocks:seq[Block]
-  var blockkseq:seq[Block_linkage]
-  var phased_blocks: seq[int]
-  echo "Phasing for hapfile " & block_hapfile
-  cellGenoTable = newTable[string,TableRef[string,int]]()
-  discard readFmf(fmf_file,cellGenoTable)
-  chr_blocks = read_blocks(block_hapfile)
-  cellBlockLeftPhaseTable = newTable[string, seq[int]]()
-  cellBlockRightPhaseTable = newTable[string, seq[int]]()
-  ## blocks at the ends with 20 SNPs or fewer, removed?
-  ## blocks not at the ends with 20SNPs or fewer are not used for linking blocks but if their linkage with pre block and post-block is consistent, 
-  ## these short blocks' haplotypes are retained. 
-  ## 
-  ## What if blocks cannot be linked ? the number of 11(00) == 10(01)
-  echo "Total blocks " & $chr_blocks.len
+# proc phaseBlocks(fmf_file:string, inputVCF:string, outputVCF:string, block_hapfile:string, threads:int):int =
+#   var cellGenoTable: TableRef[string,TableRef[string,int]]
+#   var cellBlockLeftPhaseTable,cellBlockRightPhaseTable :TableRef[string, seq[int]]
+#   var chr_blocks:seq[Block]
+#   var blockkseq:seq[Block_linkage]
+#   var phased_blocks: seq[int]
+#   echo "Phasing for hapfile " & block_hapfile
+#   cellGenoTable = newTable[string,TableRef[string,int]]()
+#   discard readFmf(fmf_file,cellGenoTable)
+#   chr_blocks = read_blocks(block_hapfile)
+#   cellBlockLeftPhaseTable = newTable[string, seq[int]]()
+#   cellBlockRightPhaseTable = newTable[string, seq[int]]()
+#   ## blocks at the ends with 20 SNPs or fewer, removed?
+#   ## blocks not at the ends with 20SNPs or fewer are not used for linking blocks but if their linkage with pre block and post-block is consistent, 
+#   ## these short blocks' haplotypes are retained. 
+#   ## 
+#   ## What if blocks cannot be linked ? the number of 11(00) == 10(01)
+#   echo "Total blocks " & $chr_blocks.len
 
-  var contig_blocks:seq[int]
-  for b_j, bl in chr_blocks:
-    echo "block " & $b_j & " len: " & $bl.h0.len
-    # echo "last 5" & $bl.h0[(high(bl.h0)-5)..high(bl.h0)]
-    # echo "first 5" & $bl.h0[0..5]
-    discard block_phase_in_cells(cellGenoTable, bl.snpPos, bl.h0, true, cellBlockLeftPhaseTable)
-    discard block_phase_in_cells(cellGenoTable, bl.snpPos, bl.h0, false, cellBlockRightPhaseTable)
-    contig_blocks = find_contig_blocks(chr_blocks, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
-  # echo "blocks for contigs" & $contig_blocks
-  blockkseq = build_contig(contig_blocks = contig_blocks, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
-  for bk in blockkseq:
-    echo "prevBlock" &  $bk.prevBlock & "nextBlock" &  $bk.nextBlock & " 00 type " & $ $bk.linkageType_00 &  " 01 type " & $bk.linkageType_01 &  " switch posterior prob " & $bk.switch_posterior_prob  & " switch value :" & $bk.switch
-  phased_blocks = infer_non_contig_blocks(contig_blocks, blockkseq, chr_blocks, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
+#   var contig_blocks:seq[int]
+#   for b_j, bl in chr_blocks:
+#     echo "block " & $b_j & " len: " & $bl.h0.len
+#     # echo "last 5" & $bl.h0[(high(bl.h0)-5)..high(bl.h0)]
+#     # echo "first 5" & $bl.h0[0..5]
+#     discard block_phase_in_cells(cellGenoTable, bl.snpPos, bl.h0, true, cellBlockLeftPhaseTable)
+#     discard block_phase_in_cells(cellGenoTable, bl.snpPos, bl.h0, false, cellBlockRightPhaseTable)
+#     contig_blocks = find_contig_blocks(chr_blocks, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
+#   # echo "blocks for contigs" & $contig_blocks
+#   blockkseq = build_contig(contig_blocks = contig_blocks, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
+#   for bk in blockkseq:
+#     echo "prevBlock" &  $bk.prevBlock & "nextBlock" &  $bk.nextBlock & " 00 type " & $ $bk.linkageType_00 &  " 01 type " & $bk.linkageType_01 &  " switch posterior prob " & $bk.switch_posterior_prob  & " switch value :" & $bk.switch
+#   phased_blocks = infer_non_contig_blocks(contig_blocks, blockkseq, chr_blocks, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
 
-  discard  write_linked_blocks(inputVCF,outputVCF, blocks = chr_blocks, blocks_phase = phased_blocks, threads= threads)
-  return 0
+#   discard  write_linked_blocks(inputVCF,outputVCF, blocks = chr_blocks, blocks_phase = phased_blocks, threads= threads)
+#   return 0
 proc sgcocaller(threads:int, ivcf:VCF, barcodeTable:TableRef,
                 ibam:Bam, out_dir:string, mapq:int, 
                 minbsq:int, mintotal:int, maxtotal:int, mindp:int, maxdp:int, 
@@ -179,11 +177,14 @@ when(isMainModule):
   $version
 
   Usage:
-      sgcocaller fmf [options] <BAM> <VCF> <barcodeFile> <out_prefix>
+      sgcocaller gtMtx [options] <BAM> <VCF> <barcodeFile> <out_prefix>
+      sgcocaller phase [options] <gtMtxFile> <phaseOutputDir>
+      sgcocaller swphase [options] <gtMtxFile> <phaseSnpAnnotFile>
+      sgcocaller sxo [options] <phaseOutputDir> <out_prefix>
       sgcocaller xo [options] <BAM> <VCF> <barcodeFile> <out_prefix>
-      sgcocaller phase_blocks [options] <VCF> <fmf> <hapfile> <out_vcf>
       
 
+
 Arguments:
 
   <BAM> the read alignment file with records of single-cell DNA reads
@@ -218,9 +219,10 @@ Options:
 
 
   Examples
-      ./sgcocaller fmf --threads 4 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/cellfragment.fmf
+      ./sgcocaller gtMtx --threads 4 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/cellfragment.fmf
+      ./sgcocaller phase gtMtxFile phaseOutputDir
       ./sgcocaller xo --threads 4 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/ccsnp
-      ./sgcocaller phase_blocks --threads 4 AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz ./percell/cellfragment.fmf ./percell/ccsnp/linkedBlocks.vcf.gz
+      ./sgcocaller sxo phaseOutputDir barcodeFile.tsv ./percell/ccsnp
 
 """ % ["version", version])
 
@@ -241,7 +243,7 @@ Options:
     cmPmb:float
     barcodeTag="CB"
     minsnpdepth:int
-    out_vcf,fmf,barcodeFile,bamfile,vcff,hapfile:string
+    barcodeFile,bamfile,vcff:string
     vcfGtPhased = false
   echo $args
   threads = parse_int($args["--threads"])
@@ -258,7 +260,7 @@ Options:
   cmPmb = parse_float($args["--cmPmb"])
   vcfGtPhased = parse_bool($args["--phased"])
   
-  if args["fmf"] or args["xo"]:
+  if args["gtMtx"] or args["xo"]:
     var 
       ibam:Bam
       ivcf:VCF
@@ -266,13 +268,12 @@ Options:
     bamfile = $args["<BAM>"]
     barcodeFile = $args["<barcodeFile>"]
     out_dir = $args["<out_prefix>"]
+    if (out_dir[^1] != '_') and (out_dir[^1] != '/') : out_dir = out_dir & "_"
     vcff = $args["<VCF>"]
-
     if not open(ibam, bamfile, threads=threads, index = true):
         quit "couldn't open input bam"
     if not open(ivcf, vcff, threads=threads):
         quit "couldn't open: vcf file"
-
     if($args["--chrom"] != "nil"):
       selectedChrs = $args["--chrom"]
       s_Chrs = selectedChrs.split(',')
@@ -280,9 +281,7 @@ Options:
       ## find all chroms from headers of vcf file (Contigs)
       let contigs = ivcf.contigs 
       s_Chrs = map(contigs, proc(x: Contig): string = x.name)
-
     var hf = hts.hts_open(cstring(barcodeFile), "r")
-    #### TODO : Table size
     var barcodeTable =  newTable[string,int](initialSize = 1024)
     var kstr: hts.kstring_t
     kstr.l = 0
@@ -291,30 +290,43 @@ Options:
     var ithSperm = 0
     ## initiate the table with CB as keys, allele counts (ref object) as elements
     while hts_getline(hf, cint(10), addr kstr) > 0:
-      if $kstr.s[0] == "#":
-        continue
+      if $kstr.s[0] == "#": continue
       var v = $kstr.s
       discard barcodeTable.hasKeyOrPut(v, ithSperm)
       ithSperm.inc
     discard hf.hts_close()
-    if args["fmf"]:
-      echo "generating fragment file to " & out_dir
-      var outfmf:FileStream
-      try: 
-        outfmf = openFileStream(out_dir, fmWrite)
-      except:
-        stderr.write getCurrentExceptionMsg()
-      discard getfmf(ibam = ibam, ivcf = ivcf, barcodeTable = barcodeTable, outfmf = outfmf, maxTotalReads = maxtotal,
-                    minTotalReads = mintotal, mapq = mapq, minbsq =minbsq, mindp = mindp, barcodeTag = barcodeTag,minsnpdepth=minsnpdepth)
-      close(outfmf)
-    if args["xo"]:
-      echo "running crossover calling \n"
-      echo "running for these chromosomes as specified in the header of the provided VCF file " & $s_Chrs
+    if args["gtMtx"]:
+      var outGtMtxFile, outTotalCountMtxFile,outAltCountMtxFile : string
+      var outGtMtx, outSnpAnnot, outTotalCountMtx, outAltCountMtx:FileStream
+      for chrom in s_Chrs:
+        echo "generating genotype sparse matrix file to " & out_dir & "for chr " & chrom
+        outGtMtxFile = out_dir & chrom & "_gtMtx.mtx"
+        outTotalCountMtxFile = out_dir & chrom &  "_totalMtx.mtx"
+        outAltCountMtxFile = out_dir & chrom &  "_altMtx.mtx"
+        try: 
+          outGtMtx = openFileStream(outGtMtxFile, fmWrite)
+          outSnpAnnot = openFileStream(out_dir & chrom &  "_snpAnnot.txt", fmWrite)
+          outTotalCountMtx = openFileStream(outTotalCountMtxFile, fmWrite)
+          outAltCountMtx = openFileStream(outAltCountMtxFile, fmWrite)
+        except:
+          stderr.write getCurrentExceptionMsg()
+
+        discard getGtMtx(ibam = ibam, ivcf = ivcf, barcodeTable = barcodeTable, outGtMtx = outGtMtx, 
+                         outTotalCountMtx = outTotalCountMtx ,outAltCountMtx = outAltCountMtx,
+                         outSnpAnnot = outSnpAnnot, maxTotalReads = maxtotal,
+                         minTotalReads = mintotal, mapq = mapq, minbsq = minbsq, minCellDp = mindp,maxCellDp =maxdp,
+                         barcodeTag = barcodeTag, minsnpdepth=minsnpdepth)                    
+        for mtxFile in [outGtMtxFile, outTotalCountMtxFile,outAltCountMtxFile]:
+          var imtx = readMtx(mtx_file = mtxFile)
+          discard sortWriteMtx(imtx, mtx_file = mtxFile)      
+    elif args["xo"]:
+      echo "running crossover calling from VCF and BAM file\n"
+      echo "running for these chromosomes as specified in the header of the provided VCF file for chroms: " & $s_Chrs
       discard sgcocaller(threads, ivcf, barcodeTable, ibam,
                       out_dir, mapq, minbsq, mintotal, 
                       maxtotal, mindp, maxdp, thetaREF, thetaALT, cmPmb, s_Chrs,barcodeTag,phased = vcfGtPhased)
       var outFileTotalCountMtxFile, outFileAltCountMtxFile: string
-      ## sort entries in mtx files 
+        ## sort entries in mtx files 
       for chrom in s_Chrs:
         outFileTotalCountMtxFile = out_dir & chrom & "_totalCount.mtx"
         outFileAltCountMtxFile = out_dir & chrom & "_altCount.mtx"
@@ -323,13 +335,20 @@ Options:
           discard sortWriteMtx(imtx, mtx_file = mtxFile)
     ibam.close()
     ivcf.close()
-  if args["phase_blocks"]:
-    fmf = $args["<fmf>"]
-    out_vcf = $args["<out_vcf>"]
-    vcff = $args["<VCF>"]
-    hapfile =  $args["<hapfile>"]
-    echo "running phasing blocks \n"
-    discard phaseBlocks(fmf_file = fmf, inputVCF=vcff, outputVCF=out_vcf, block_hapfile=hapfile, threads=threads)
+  elif args["phase"]:
+    let gtMtxFile = $args["<gtMtxFile>"]
+    var phaseOutDir = $args["<phaseOutputDir>"]
+    if (phaseOutdir[^1] != '_') and (phaseOutdir[^1] != '/'): phaseOutdir = phaseOutdir & "_"
+    let snpAnnotFile = gtMtxFile.replace(sub = "_gtMtx.mtx", by = "_snpAnnot.txt")
+    echo "running phasing from single cell genotype matrix (one chromosome) " & gtMtxFile
+    echo "using the " & snpAnnotFile & "for generating phased haplotypes"
+    discard sgphase(mtxFile = gtMtxFile, snpAnnotFile = snpAnnotFile, phaseOutdir = phaseOutdir)
+  elif args["swphase"]:
+    let gtMtxFile = $args["<gtMtxFile>"]
+    let phaseSnpAnnotFile = $args["<phaseSnpAnnotFile>"]
+    echo "finding snps positions to switch from single cell genotype matrix (one chromosome) " & gtMtxFile
+    echo "using the " & phaseSnpAnnotFile & "for generating final phased haplotypes"
+    discard sgphase(mtxFile = gtMtxFile, snpAnnotFile = snpAnnotFile, phaseOutdir = phaseOutdir)
 
 
  
\ No newline at end of file