diff --git a/private/blocks_utils.nim b/private/blocks_utils.nim
new file mode 100755
index 0000000000000000000000000000000000000000..6c3dbc164dc7c4b09ca96e202f8c9ceaae58c8de
--- /dev/null
+++ b/private/blocks_utils.nim
@@ -0,0 +1,150 @@
+import streams
+import strutils
+import math
+
+let block_sep = "********"
+let split_threshold = 30.0
+let mis_threshold = 30.0
+#let version = "v0.3.3"
+
+type
+  Block* = ref object
+    # the positions of SNP (index position from provided VCF)
+    snpPos*: seq[int]
+    # number of SNPs it spans
+    length*: int
+    # number of SNPs phased
+    phased*: int 
+    # number of base pairs it spans
+    # span 
+    span*: int
+    # number of fragments
+    fragments*: int
+    # haplotype type 0, its bit-wise complementary is h1
+    h0*: seq[int] 
+    switch_error*: seq[float]
+    mismatch_error*: seq[float]
+    # 1 switch, 0 no switch comparing to the prev block, -1 means unlinked
+    switch: int
+    # int frags;
+    # float SCORE, bestSCORE, lastSCORE;
+    # int* slist; // ordered list of variants in this connected component
+    # int lastvar; // index of the first and last variants in this connected component
+    # int iters_since_improvement;
+
+
+type Block_linkage* = ref object
+  prevBlock*: int
+  nextBlock*: int
+  linkageType_00* :int
+  linkageType_01*: int
+  switch_posterior_prob*: float
+  switch_confidence*: float
+  switch*: int
+
+
+proc cal_posterior*(error_rate = 0.1, n_snps:int, n_mis:int): float = 
+  var ll_h0 = (1-error_rate)^(n_snps-n_mis)*error_rate^n_mis
+  var ll_h1 = (1-error_rate)^(n_mis)*error_rate^(n_snps-n_mis)
+  var new_p = max(ll_h0/(ll_h0+ll_h1),ll_h1/(ll_h0+ll_h1))
+  return new_p
+
+proc cal_switch_posterior*(error_rate = 0.1, n_blocks:int, n_sw:int): float = 
+  var ll_01 = error_rate^(n_blocks-n_sw)*(1-error_rate)^n_sw
+  var ll_00 = error_rate^(n_sw)*(1-error_rate)^(n_blocks-n_sw)
+  return ll_01/(ll_01+ll_00)
+proc is_block_header(line_string:string): bool = 
+  if line_string.splitWhitespace()[0] == "BLOCK:":
+  # .len == 11:
+    return true
+  else:
+    return false
+
+## to honor the current format of HAPCUT2 the long blocks are not splitted
+## We look at the 10th column: switch error score to split the blocks
+proc count_blocks(block_file:string, by_header = false): int =
+  var blocks = 1
+  var bfs: FileStream
+  var switch_error: float
+  try: 
+    bfs = openFileStream(block_file, fmRead)
+  except:
+    stderr.write getCurrentExceptionMsg()
+  var current_line: string
+  while not bfs.atEnd():
+    current_line = bfs.readLine()
+    if current_line.splitWhitespace()[0] == block_sep:
+      continue
+    if by_header:
+      if current_line.is_block_header:
+        blocks.inc
+      continue
+    else:
+      if current_line.is_block_header:
+        continue
+      switch_error = parseFloat(current_line.splitWhitespace()[9])
+      if switch_error < split_threshold :
+        blocks.inc
+  bfs.close()
+  if by_header:
+    return blocks - 1
+  else:
+    return blocks
+  
+proc add_record(line_record: string, current_block:Block):int = 
+  var line_record_seq = line_record.splitWhitespace()
+  let snpPos = parseInt(line_record_seq[0])
+  let switch_error = parseFloat(line_record_seq[9])
+  let mismatch_error = parseFloat(line_record_seq[10])
+  var h0: int
+  try:
+    h0 =  parseInt(line_record_seq[1])
+  except:
+    h0 = -1
+#  let discrete_prune = parseInt(line_record_seq[8])
+  current_block.snpPos.add(snpPos)
+  if mismatch_error < mis_threshold:
+    h0 = -1
+  current_block.h0.add(h0)
+  current_block.switch_error.add(switch_error)
+  current_block.mismatch_error.add(mismatch_error)
+
+proc read_blocks*(block_file:string,  by_header = false):seq[Block] = 
+  # start with block 0  
+  var b_j = 0
+  var bfs: FileStream
+  var current_line: string
+  var blockSeq = newSeq[Block]()
+  var line_index = 0
+  var switch_error: float
+  try: 
+    bfs = openFileStream(block_file, fmRead)
+  except:
+    stderr.write getCurrentExceptionMsg()
+  ## First line in block file, not used, a block header line
+  current_line = bfs.readLine()
+  var currentBlock = Block()
+  while not bfs.atEnd():
+    current_line = bfs.readLine()
+    if current_line.splitWhitespace()[0] == block_sep:
+      continue
+    if by_header:
+      if current_line.is_block_header:
+        blockSeq.add(currentBlock)
+        currentBlock = Block()
+        continue
+      else:
+        ## add current line of record to this block
+        discard add_record(current_line,currentBlock)
+    else:
+      if current_line.is_block_header:
+        continue
+      else:
+        switch_error = parseFloat(current_line.splitWhitespace()[9])
+        if switch_error < split_threshold :
+          blockSeq.add(currentBlock)
+          currentBlock = Block()
+        discard add_record(current_line,currentBlock)
+  blockSeq.add(currentBlock)
+  bfs.close()
+  return blockSeq
diff --git a/private/fragments.nim b/private/fragments.nim
index a6f24648708044b93c53eb142b8ef362a2aa014b..7eac2de7339221de0f06166ba72033ebf76e946d 100755
--- a/private/fragments.nim
+++ b/private/fragments.nim
@@ -6,7 +6,7 @@ import streams
 # minSnpDepth, this SNP should at least be covered by two cells
 
 proc findGtNodes*(rec:Variant, variantIndex: int, ibam:Bam, maxTotalReads:int, minTotalReads:int, mapq: int,
-                  barcodeTable:OrderedTableRef,
+                  barcodeTable:TableRef,
                   minbsq:int,
                   barcodeTag:string, minCellDp:int, minSnpDepth:int ): Table[string,GtNode] =
 
diff --git a/private/graph.nim b/private/graph.nim
index bea952e6034c7f93a38a3624ef84c4788d6fd367..fe029802c9bf0445bebeb41d7bf0d1a0ac891988 100755
--- a/private/graph.nim
+++ b/private/graph.nim
@@ -16,7 +16,7 @@ type
   SeqSpermViNodes* = seq[SpermViNodes]
 
 
-proc addViNode*(barcodeTable: OrderedTableRef, 
+proc addViNode*(barcodeTable: TableRef, 
                alleleCountTable: Table[string,allele_expr],
                scSpermSeq: var SeqSpermViNodes,
                outFileTotalCountMtx: var FileStream,
@@ -27,7 +27,7 @@ proc addViNode*(barcodeTable: OrderedTableRef,
                snpIndex: int,
                thetaRef: float,
                thetaAlt: float,
-               rec: Variant,
+               rec_pos: int,
                initProb: array,
                cmPmb: float): int = 
   for bc, ac in alleleCountTable.pairs:
@@ -47,7 +47,7 @@ proc addViNode*(barcodeTable: OrderedTableRef,
       currentViNode.pathState[stateRef] = stateN
       currentViNode.pathState[stateAlt] = stateN
       currentViNode.state = stateN
-      currentViNode.pos = int(rec.POS)
+      currentViNode.pos = int(rec_pos)
       currentViNode.cAlt = int(ac.cAlt)
       currentViNode.cRef = int(ac.cRef)
       
@@ -56,8 +56,8 @@ proc addViNode*(barcodeTable: OrderedTableRef,
       scSpermSeq[ithSperm].spermSnpIndexLookUp[scSpermSeq[ithSperm].viNodeseq.len] = snpIndex
     else:
       let preVNode = scSpermSeq[ithSperm].viNodeseq[high(scSpermSeq[ithSperm].viNodeseq)]
-      var ltransProb = math.ln(getTrans(preVNode.pos,int(rec.POS),cmPmb=cmPmb))
-      var lnoTransProb = math.ln(1-getTrans(preVNode.pos,int(rec.POS),cmPmb=cmPmb))
+      var ltransProb = math.ln(getTrans(preVNode.pos,int(rec_pos),cmPmb=cmPmb))
+      var lnoTransProb = math.ln(1-getTrans(preVNode.pos,int(rec_pos),cmPmb=cmPmb))
       var currentViNode = ViNode()
         # ref/alt -> ref
       var refTref = preVNode.pathScore[stateRef] + lnoTransProb
@@ -82,7 +82,7 @@ proc addViNode*(barcodeTable: OrderedTableRef,
       currentViNode.pathScore[stateRef] += emissionArray[stateRef]
       currentViNode.cAlt = int(ac.cAlt)
       currentViNode.cRef = int(ac.cRef)
-      currentViNode.pos = int(rec.POS)
+      currentViNode.pos = int(rec_pos)
       scSpermSeq[ithSperm].viNodeseq.add(currentViNode)
       scSpermSeq[ithSperm].snpIndexLookUp[snpIndex] = scSpermSeq[ithSperm].viNodeseq.len
       scSpermSeq[ithSperm].spermSnpIndexLookUp[scSpermSeq[ithSperm].viNodeseq.len] = snpIndex
diff --git a/private/phase_blocks.nim b/private/phase_blocks.nim
new file mode 100755
index 0000000000000000000000000000000000000000..9f2b2db9f74207839c30609bdde79874fe5e1c0b
--- /dev/null
+++ b/private/phase_blocks.nim
@@ -0,0 +1,263 @@
+# phase blocks from cell fmf genotypes
+
+import blocks_utils
+#import readfmf
+import tables
+import math
+import sequtils
+import hts
+
+let n_anchor_snps = 9
+let posterior_threshold = 0.99
+let confidence_threshold = 0.95
+let debug = false
+
+proc match_phase(cell_geno:seq[int], block_h0:seq[int]): int =
+  if cell_geno.len != block_h0.len:
+    quit "the haplotypes lengths should be equal" & " " & $cell_geno.len & " " & $cell_geno.len
+  var diff = map(toSeq(0..high(cell_geno)), proc (x:int):int = abs(cell_geno[x]-block_h0[x]))
+  #echo "matching " & $cell_geno & " to " & $block_h0 
+  var posterior_phase = cal_posterior(n_snps = cell_geno.len, n_mis = sum(diff))
+  ## unphased 
+  if cell_geno.len < n_anchor_snps:
+    if debug: echo "returned -1 because too few SNPs in the block covered by this cell. " & $diff.len
+    return -1
+  if float(sum(diff)) > (n_anchor_snps+1)/2:
+    if posterior_phase > posterior_threshold:
+      if debug: echo "returned 1 with diffs to h0 : " & $sum(diff) & " of " & $diff.len & ": posterior probs = " & $posterior_phase
+      return 1
+    else:
+      if debug: echo "returned -1 because posterior prob is not high enough with diffs to h0 : " & $sum(diff) & " of " & $diff.len & ": posterior probs = " & $posterior_phase
+      return -1
+  else:
+    if posterior_phase > posterior_threshold:
+      if debug: echo "returned 0 with diffs to h0 : " & $sum(diff) & " of " & $diff.len & ": posterior probs = " & $posterior_phase
+      return 0
+    else:
+      if debug: echo "returned -1 because posterior prob is not high enough with diffs to h0 : " & $sum(diff) & " of " & $diff.len & ": posterior probs = " & $posterior_phase
+      return -1 
+
+proc block_phase_in_cells*(cellGenoTable:TableRef, block_pos: seq[int], block_h0: seq[int], left:bool, cellBlockPhaseTable: TableRef[string, seq[int]]): int =
+  var n_covered_snps = 0
+  var covered_snps_cell_geno: seq[int]
+  var covered_snps_block_geno: seq[int]
+  var block_phase = 0
+  for barcode in cellGenoTable.keys():
+    n_covered_snps = 0
+    covered_snps_cell_geno = newSeq[int]()
+    covered_snps_block_geno = newSeq[int]()
+    if left:
+      if debug: echo "Phasing left of the block for cell "  & $barcode  
+      for pos_i in 0..high(block_pos):
+        if cellGenoTable[barcode].hasKey($block_pos[pos_i]) and block_h0[pos_i] != -1:
+          #echo "using " & $block_pos[pos_i] & "for cell " & barcode
+          n_covered_snps.inc
+          covered_snps_cell_geno.add(cellGenoTable[barcode][$block_pos[pos_i]])
+          covered_snps_block_geno.add(block_h0[pos_i])
+        if n_covered_snps > n_anchor_snps:
+          break
+    else: 
+      if debug: echo "Phasing right of the block for cell "  & $barcode  
+      #  high(block_pos)..0
+      for pos_i in countdown((block_pos.len - 1),0):
+        if cellGenoTable[barcode].hasKey($block_pos[pos_i]) and block_h0[pos_i] != -1:
+          #echo "using " & $block_pos[pos_i] & "for cell " & barcode
+          n_covered_snps.inc
+          covered_snps_cell_geno.add(cellGenoTable[barcode][$block_pos[pos_i]])
+          covered_snps_block_geno.add(block_h0[pos_i])
+        if n_covered_snps > n_anchor_snps:
+          break   
+    block_phase = match_phase(covered_snps_cell_geno,covered_snps_block_geno)
+    if not cellBlockPhaseTable.hasKey(barcode):
+      cellBlockPhaseTable[barcode] = newSeq[int]()
+    cellBlockPhaseTable[barcode].add(block_phase)     
+proc find_contig_blocks*(blocks:seq[Block],cellBlockLeftPhaseTable: TableRef[string, seq[int]],cellBlockRightPhaseTable: TableRef[string, seq[int]] ):seq[int] = 
+  var contig_blocks = newSeq[int]()
+  var count_missing =  newSeqWith[blocks.len,0]
+  var contig_block_allow_missing = int(ceil(float(cellBlockLeftPhaseTable.len) * 0.1))
+  # for b in 0..high(blocks):
+  var left_right_phase: seq[tuple[left:int,right:int]]
+  for bc in cellBlockLeftPhaseTable.keys():
+    left_right_phase = zip(cellBlockLeftPhaseTable[bc],cellBlockRightPhaseTable[bc])
+    if debug: echo bc & "\t" & $left_right_phase
+    for bl in 0..high(left_right_phase):
+      if left_right_phase[bl][0] == -1 or left_right_phase[bl][1] == -1 :
+        count_missing[bl].inc
+  if debug: echo "block missing in X number of cells:" & $count_missing
+  for i, c in count_missing:
+    if c <= contig_block_allow_missing: contig_blocks.add(i)
+  return contig_blocks
+
+## find linkage of two blocks
+## 
+proc find_linkage*(block1: int, block2: int, cellBlockLeftPhaseTable: TableRef[string, seq[int]],cellBlockRightPhaseTable: TableRef[string, seq[int]]): Block_linkage= 
+  if debug: echo "linking " & $block1 & "and" & $block2
+  var left_right_phase: seq[tuple[left:int,right:int]]
+  var linkString: string
+  if not (block1 < block2):
+    quit "block1 needs to be left of block2"
+  var blockLinkage = Block_linkage(prevBlock :block1, nextBlock: block2, switch: -1)
+  for bc in cellBlockLeftPhaseTable.keys():
+    left_right_phase = zip(cellBlockLeftPhaseTable[bc],cellBlockRightPhaseTable[bc])
+    if debug: echo $left_right_phase[block1].right & $left_right_phase[block2].left
+    linkString = $left_right_phase[block1].right & $left_right_phase[block2].left
+    if linkString in @["00","11"]:
+      blockLinkage.linkageType_00.inc
+    elif linkString in @["01","10"]:
+      blockLinkage.linkageType_01.inc
+  blockLinkage.switch_posterior_prob = cal_switch_posterior(error_rate = 0.1, (blockLinkage.linkageType_00 + blockLinkage.linkageType_01), blockLinkage.linkageType_01)
+  blockLinkage.switch_confidence =  log10(blockLinkage.switch_posterior_prob) - log10(1-blockLinkage.switch_posterior_prob)
+  return blockLinkage
+  
+# contig_blocks, the blocks to build contigs. Rest of the blocks are filled in if can
+proc build_contig*(contig_blocks:seq[int], cellBlockLeftPhaseTable: TableRef[string, seq[int]],cellBlockRightPhaseTable: TableRef[string, seq[int]] ):seq[Block_linkage] = 
+  var blockLinkageSeq: seq[Block_linkage]
+  var blockLinkage:Block_linkage
+  for b in 0..(contig_blocks.len-2):
+    blockLinkage = find_linkage(block1=contig_blocks[b], block2=contig_blocks[b+1],cellBlockLeftPhaseTable,cellBlockRightPhaseTable)
+    if blockLinkage.switch_confidence < 0.0:
+      blockLinkage.switch = 0
+    else: 
+      blockLinkage.switch = 1
+    blockLinkageSeq.add(blockLinkage)
+  return blockLinkageSeq
+
+proc infer_block_linkage_to_one_contig_block*(contig_block:int,to_infer_block:int,  
+                                             cellBlockLeftPhaseTable: TableRef[string, seq[int]],
+                                             cellBlockRightPhaseTable: TableRef[string, seq[int]],
+                                             at_left: bool):int = 
+  if at_left:
+    if debug: echo "linking " & $to_infer_block & "to contig block " & $contig_block & " from left"
+    var blockLinkage = Block_linkage(prevBlock : to_infer_block, nextBlock :contig_block, switch : -1) 
+    blockLinkage = find_linkage(block1 = to_infer_block, block2 = contig_block, cellBlockLeftPhaseTable,cellBlockRightPhaseTable)
+    if abs(blockLinkage.switch_confidence) < confidence_threshold:
+      return -1
+    elif blockLinkage.switch_confidence > 0:
+      return 1
+    else:
+      return 0
+  else:
+    if debug: echo "linking " & $to_infer_block & "to contig block " & $contig_block & " from right"
+    var blockLinkage = Block_linkage(prevBlock: contig_block, nextBlock : to_infer_block , switch: -1) 
+    blockLinkage = find_linkage(block1 = contig_block, block2 = to_infer_block, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
+    if abs(blockLinkage.switch_confidence) < confidence_threshold:
+      return -1
+    elif blockLinkage.switch_confidence > 0:
+      return 1
+    else:
+      return 0
+
+proc infer_non_contig_blocks*(contig_blocks:seq[int], linkedContigs:seq[Block_linkage], blocks:seq[Block],cellBlockLeftPhaseTable: TableRef[string, seq[int]],cellBlockRightPhaseTable: TableRef[string, seq[int]]):seq[int] = 
+  ## time to gather the non_contig forming blocks and fill their phase:
+  ## Anchor using the first block in the contig blocks
+  var hap_sw = 0
+  var hap_sw_left = 0
+  var hap_sw_right = 0
+  
+  var blocks_phase = newSeqWith(blocks.len,-1)
+  var left_contig_block:int
+  var right_contig_block: int 
+  blocks_phase[contig_blocks[0]] = 0
+  for lc in linkedContigs:
+    if lc.switch == 0:
+      if debug: echo "blocks_phase[lc.nextBlock] " & $blocks_phase[lc.nextBlock]
+      
+      blocks_phase[lc.nextBlock] = blocks_phase[lc.prevBlock] 
+    else:
+      blocks_phase[lc.nextBlock] = (1 xor blocks_phase[lc.prevBlock])  
+  if debug: echo "block contig haplotype : " & $blocks_phase
+  for b_j in 0..(blocks.len-1):
+    if not (b_j in contig_blocks):
+      if b_j < contig_blocks[0]:
+        hap_sw = infer_block_linkage_to_one_contig_block(contig_block = contig_blocks[0], to_infer_block = b_j, cellBlockLeftPhaseTable,cellBlockRightPhaseTable, at_left=true)
+        if hap_sw == 1:
+          blocks_phase[b_j] = (1 xor contig_blocks[0]) 
+        elif hap_sw == 0:
+          blocks_phase[b_j] = contig_blocks[0] 
+      elif b_j > contig_blocks[contig_blocks.len-1]:
+        hap_sw = infer_block_linkage_to_one_contig_block(contig_block = contig_blocks[contig_blocks.len-1], to_infer_block = b_j,cellBlockLeftPhaseTable,cellBlockRightPhaseTable,at_left=false)
+        if hap_sw == 1:
+          blocks_phase[b_j] = (1 xor contig_blocks[0]) 
+        elif hap_sw == 0:
+          blocks_phase[b_j] = contig_blocks[0] 
+      else:
+        ## in between two contig blocks
+        left_contig_block = b_j
+        while not (left_contig_block in contig_blocks):
+          left_contig_block = left_contig_block - 1
+        right_contig_block = b_j
+        while not (right_contig_block in contig_blocks):
+          right_contig_block = right_contig_block+1
+        hap_sw_left = infer_block_linkage_to_one_contig_block(contig_block = left_contig_block, to_infer_block = b_j, cellBlockLeftPhaseTable,cellBlockRightPhaseTable, at_left=false)
+        hap_sw_right = infer_block_linkage_to_one_contig_block(contig_block = right_contig_block, to_infer_block = b_j, cellBlockLeftPhaseTable,cellBlockRightPhaseTable, at_left=true)
+        if blocks_phase[left_contig_block] != blocks_phase[right_contig_block]:
+          if hap_sw_right == -1:
+            if hap_sw_left == 1:
+              blocks_phase[b_j] = blocks_phase[right_contig_block]
+            elif hap_sw_left == 0:
+              blocks_phase[b_j] = blocks_phase[left_contig_block]
+          elif hap_sw_right == 0 and (hap_sw_left == 1 or hap_sw_left == -1):
+              blocks_phase[b_j] = blocks_phase[right_contig_block]
+          elif hap_sw_right == 1 and (hap_sw_left == 0 or hap_sw_left == -1):
+              blocks_phase[b_j] = blocks_phase[left_contig_block]
+    else: continue 
+  return blocks_phase 
+
+proc write_linked_blocks*(vcfFile:string, outFile: string, blocks:seq[Block], blocks_phase:seq[int], threads:int):int = 
+  var ivcf: VCF
+  var ovcf: VCF
+  var v_off = 0
+  var current_block = 0
+  if not open(ivcf, vcfFile, threads=threads):
+      quit "couldn't open input vcf file"
+  if not open(ovcf, outFile, threads=threads, mode ="w"):
+    quit "couldn't open output vcf file"
+  ovcf.header = ivcf.header
+  discard ovcf.header.add_string("""##sgcocaller_v0.1=phaseBlocks""")
+  discard ovcf.write_header()
+  var gt_string:seq[int32]
+  var block_pos_i = 0
+  for v in ivcf.query("*"):
+    v_off.inc
+    if blocks_phase[current_block] == -1:
+      current_block.inc
+      if current_block == blocks.len:
+        break
+      block_pos_i = 0
+      continue
+    if v_off != blocks[current_block].snpPos[block_pos_i]:
+      continue
+    else: ## equal 
+      var f = v.format()
+      if blocks_phase[current_block] == 0:
+        # don't need to flip
+        if blocks[current_block].h0[block_pos_i] == 0:
+          ## 0|1
+          if debug: echo "block hap0 and this h0 is 0 0|1"
+          gt_string = @[int32(2),int32(5)]
+        else:
+          ## 1|0
+          if debug: echo "block hap0 and this h0 is 1 1|0"
+          gt_string = @[int32(4),int32(3)]
+      elif blocks_phase[current_block] == 1:
+        if blocks[current_block].h0[block_pos_i] == 0:
+          ## 1|0
+          if debug: echo "block hap1 and this h0 is 0 1|0"
+          gt_string = @[int32(4),int32(3)]
+        else:
+          ## 0|1
+          if debug: echo "block hap1 and this h0 is 1 0|1"
+          gt_string = @[int32(2),int32(5)]
+      if f.set("GT",gt_string) != Status.OK:
+        quit "set GT failed"
+      if not ovcf.write_variant(v) :
+        quit "write vcf failed for " & $voff
+      block_pos_i.inc
+    if block_pos_i == blocks[current_block].snpPos.len:
+      current_block.inc
+      if current_block == blocks.len:
+        break
+      block_pos_i = 0
+  ovcf.close()
+  ivcf.close()
+
diff --git a/private/readfmf.nim b/private/readfmf.nim
new file mode 100755
index 0000000000000000000000000000000000000000..6a4e50c707e57343dd3719640a2f6a86b4678c05
--- /dev/null
+++ b/private/readfmf.nim
@@ -0,0 +1,43 @@
+## read fmf 
+## barcode -> snpPos: geno
+import tables
+import streams
+import strutils
+
+proc readFmf*(frag_file:string,cellGenoTable:TableRef):int = 
+  var fmf: FileStream
+  var n_blocks,line_index,b_pos : int 
+  var current_frag, barcode, b_geno: string
+  var current_frag_seq: seq[string]
+  try: 
+      fmf = openFileStream(frag_file, fmRead)
+  except:
+      stderr.write getCurrentExceptionMsg()
+  while not fmf.atEnd():
+    current_frag = fmf.readLine()
+    current_frag_seq = current_frag.splitWhitespace()
+    n_blocks = parseInt(current_frag_seq[0])
+    barcode = current_frag_seq[1].split('.')[0]
+    for b in 1..n_blocks:
+      b_pos = parseInt(current_frag_seq[(b)*2])
+      b_geno = current_frag_seq[(b)*2+1]
+      for i, g in b_geno:
+        if cellGenoTable.hasKey(barcode):
+          discard cellGenoTable[barcode].hasKeyOrPut($(b_pos+i), parseInt($g))
+        else:
+          var geno_table = newTable[string,int]()
+          geno_table[$(b_pos+i)] = parseInt($g)
+          cellGenoTable[barcode] = geno_table
+    line_index.inc
+  echo "total fmfs " & $line_index
+  fmf.close()
+
+
+# discard readFmf("data/WC_CNV_42/fmf_snpdepth1/WC_CNV_42.chr16.fmf",cellGenoTable)
+# for k in cellGenoTable.keys():
+#   echo k
+#   break
+# echo cellGenoTable.len
+
+# for key,value in cellGenoTable["TGTATTCAGGACAGCT-1"].pairs():
+#   echo $key & "\t" & $value
diff --git a/private/utils.nim b/private/utils.nim
index be82eb0eff2c9b78d5b45c386a42cd2b813474a7..a4843a15059d469023ab808eced13f6a2e63adaa 100755
--- a/private/utils.nim
+++ b/private/utils.nim
@@ -26,7 +26,7 @@ type
     counter*: int
     node1*: GtNode
     node2*: GtNode
-proc get_base_offset*(position:cint, align: Record): int =
+proc get_base_offset*(position:int, align: Record): int =
   var 
     off = align.start.int
     qoff = 0
@@ -66,17 +66,18 @@ proc getTrans*(pos1:int64,pos2:int64,cmPmb=0.1): float =
   var rec = 1-math.exp(-float(pos2-pos1)*1e-8*cmPmb) # for autosomes 1*cmPbm cM per Mb 
   return rec
                    #   
-proc countAllele*(rec:Variant, ibam:Bam, maxTotalReads:int,
+proc countAllele*(ibam:Bam, maxTotalReads:int,
                   minTotalReads:int,
                   chrom:string, mapq: int,
-                  barcodeTable:OrderedTableRef,minbsq:int,bulkBam:bool,barcodeTag:string): Table[string,allele_expr] =
+                  barcodeTable:TableRef,minbsq:int,bulkBam:bool,barcodeTag:string,startPos:int, stopPos:int,
+                  rec_alt:char, rec_ref:char): Table[string,allele_expr] =
   var alleleCountTable = initTable[string,allele_expr]()
-  var rec_alt:char
+  #var rec_alt:char
   var total_reads = 0
   var base_off: int
   var base: char
-  rec_alt = rec.ALT[0][0]
-  for aln in ibam.query(chrom = chrom,start = rec.POS.cint-1, stop = rec.POS.cint):
+  #rec_alt = rec.ALT[0][0]
+  for aln in ibam.query(chrom = chrom,start = startPos, stop = stopPos):
     var cbt = tag[string](aln, barcodeTag)
     var currentCB: string
     if cbt.isNone:
@@ -89,15 +90,14 @@ proc countAllele*(rec:Variant, ibam:Bam, maxTotalReads:int,
     if aln.flag.unmapped or aln.mapping_quality.cint < mapq or aln.flag.dup: continue
     ## skip unmapped, duplicated, mapq low reads or aln.flag.secondary or aln.flag.supplementary
     ## if not aln.flag.proper_pair: continue
-    if not barcodeTable.hasKey(currentCB): 
-      continue
-    base_off = get_base_offset(position = rec.POS.cint, align = aln) 
+    if not barcodeTable.hasKey(currentCB): continue
+    base_off = get_base_offset(position = stopPos, align = aln) 
     base = aln.base_at(base_off)
     if aln.base_quality_at(base_off).cint < minbsq: 
       continue
     total_reads+=1
     if alleleCountTable.hasKey(currentCB):
-      if aln.base_at(base_off) == rec.REF[0]: 
+      if aln.base_at(base_off) == rec_ref: 
         alleleCountTable[currentCB].inc_count_cref
         continue
       if aln.base_at(base_off) == rec_alt: 
@@ -106,7 +106,7 @@ proc countAllele*(rec:Variant, ibam:Bam, maxTotalReads:int,
     else:
       var new_snp = allele_expr(cref:0,calt:0)
       alleleCountTable[currentCB] = new_snp
-      if aln.base_at(base_off) == rec.REF[0]: 
+      if aln.base_at(base_off) == rec_ref: 
         alleleCountTable[currentCB].inc_count_cref
         continue
       if aln.base_at(base_off) == rec_alt: 
diff --git a/sgcocaller.nim b/sgcocaller.nim
index d381011c9677cb95666092c99e88db0b892395a8..fb06183fd25f9f3b11a7e4da57390d7d39c7093a 100755
--- a/sgcocaller.nim
+++ b/sgcocaller.nim
@@ -14,8 +14,12 @@ import math
 import streams
 import private/graph
 import private/findpath
+import private/blocks_utils
+import private/readfmf
+import private/phase_blocks
 
-proc getfmf(ibam:Bam, ivcf:VCF, barcodeTable:OrderedTableRef, outfmf:FileStream,maxTotalReads:int,
+
+proc getfmf(ibam:Bam, ivcf:VCF, barcodeTable:TableRef, outfmf:FileStream,maxTotalReads:int,
                   minTotalReads:int,  mapq: int,minbsq:int,mindp:int,barcodeTag:string, minsnpdepth:int):int = 
   var variantIndex = 1
   var cellFragmentsTable = newTable[string,Fragment]()
@@ -30,11 +34,46 @@ proc getfmf(ibam:Bam, ivcf:VCF, barcodeTable:OrderedTableRef, outfmf:FileStream,
     variantIndex = variantIndex + 1
   echo "processed " & $(variantIndex) & " variants"
   return 0
+proc phaseBlocks(fmf_file:string, inputVCF:string, outputVCF:string, block_hapfile:string, threads:int):int =
+  var cellGenoTable: TableRef[string,TableRef[string,int]]
+  var cellBlockLeftPhaseTable,cellBlockRightPhaseTable :TableRef[string, seq[int]]
+  var chr_blocks:seq[Block]
+  var blockkseq:seq[Block_linkage]
+  var phased_blocks: seq[int]
+  echo "Phasing for hapfile " & block_hapfile
+  cellGenoTable = newTable[string,TableRef[string,int]]()
+  discard readFmf(fmf_file,cellGenoTable)
+  chr_blocks = read_blocks(block_hapfile)
+  cellBlockLeftPhaseTable = newTable[string, seq[int]]()
+  cellBlockRightPhaseTable = newTable[string, seq[int]]()
+  ## blocks at the ends with 20 SNPs or fewer, removed?
+  ## blocks not at the ends with 20SNPs or fewer are not used for linking blocks but if their linkage with pre block and post-block is consistent, 
+  ## these short blocks' haplotypes are retained. 
+  ## 
+  ## What if blocks cannot be linked ? the number of 11(00) == 10(01)
+  echo "Total blocks " & $chr_blocks.len
+  var contig_blocks:seq[int]
+  for b_j, bl in chr_blocks:
+    echo "block " & $b_j & " len: " & $bl.h0.len
+    # echo "last 5" & $bl.h0[(high(bl.h0)-5)..high(bl.h0)]
+    # echo "first 5" & $bl.h0[0..5]
+    discard block_phase_in_cells(cellGenoTable, bl.snpPos, bl.h0, true, cellBlockLeftPhaseTable)
+    discard block_phase_in_cells(cellGenoTable, bl.snpPos, bl.h0, false, cellBlockRightPhaseTable)
+    contig_blocks = find_contig_blocks(chr_blocks, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
+  # echo "blocks for contigs" & $contig_blocks
+  blockkseq = build_contig(contig_blocks = contig_blocks, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
+  for bk in blockkseq:
+    echo "prevBlock" &  $bk.prevBlock & "nextBlock" &  $bk.nextBlock & " 00 type " & $ $bk.linkageType_00 &  " 01 type " & $bk.linkageType_01 &  " switch posterior prob " & $bk.switch_posterior_prob  & " switch value :" & $bk.switch
+  phased_blocks = infer_non_contig_blocks(contig_blocks, blockkseq, chr_blocks, cellBlockLeftPhaseTable, cellBlockRightPhaseTable)
+
+  # echo phased_blocks
 
-proc sgcocaller(threads:int, ivcf:VCF, barcodeTable:OrderedTableRef,
+  discard  write_linked_blocks(inputVCF,outputVCF, blocks = chr_blocks, blocks_phase = phased_blocks, threads= threads)
+  return 0
+proc sgcocaller(threads:int, ivcf:VCF, barcodeTable:TableRef,
                 ibam:Bam, out_dir:string, mapq:int, 
                 minbsq:int, mintotal:int, maxtotal:int, mindp:int, maxdp:int, 
-                thetaREF:float, thetaALT:float, cmPmb:float,s_Chrs:seq,barcodeTag:string): int =
+                thetaREF:float, thetaALT:float, cmPmb:float,s_Chrs:seq,barcodeTag:string,phased:bool): int =
   
   var bulkBam = false
   if barcodeTable.len == 1 and barcodeTable.hasKey("bulk"):
@@ -70,21 +109,31 @@ proc sgcocaller(threads:int, ivcf:VCF, barcodeTable:OrderedTableRef,
       outFileStream.writeLine(' '.repeat(50))
     
     outFileSNPanno.writeLine(join(["POS","REF", "ALT"], sep = "\t"))
-
+    var ac = new_seq[int32](10)
     for rec in ivcf.query(chrom):
       if rec.ALT.len > 1 or rec.ALT.len == 0 : continue
       ## alleleCountTable contains for this rec.POS, each cell barcode's allele counts 
-      var alleleCountTable = countAllele(rec=rec, ibam=ibam, chrom=chrom, mapq=mapq, barcodeTable=barcodeTable,minbsq=minbsq,
-                                        maxTotalReads = maxtotal, minTotalReads = mintotal,bulkBam = bulkBam, barcodeTag = barcodeTag)
+      var rec_alt = rec.ALT[0][0]
+      var rec_ref = rec.REF[0]
+      if phased:
+        var f = rec.format()
+        var gts = f.genotypes(ac)
+        if ac[0] == 4 and ac[1] == 3:
+          # gt is 1|0
+            rec_alt = rec.REF[0]
+            rec_ref = rec.ALT[0][0]
+          # if not gt == 0|1 continue
+        elif not (ac[0] == 2 and ac[1] == 5): continue
+      var alleleCountTable = countAllele(ibam=ibam, chrom=chrom, mapq=mapq, barcodeTable=barcodeTable,minbsq=minbsq,
+                                        maxTotalReads = maxtotal, minTotalReads = mintotal,bulkBam = bulkBam, barcodeTag = barcodeTag, startPos = rec.POS.cint-1, stopPos=rec.POS.cint,rec_alt =  rec_alt, rec_ref = rec_ref)
       if alleleCountTable.len==0: continue
-      var rec_alt:char
-      rec_alt = rec.ALT[0][0]
+
       ## add to snpAnnoSeq, later write to SNPannot file, which contains SNP.pos, SNP.ref,SNP.alt; The rowAnnotations
       snpIndex += 1
-      outFileSNPanno.writeLine(join([$rec.POS, $rec.REF[0],$rec_alt], sep="\t") )
+      outFileSNPanno.writeLine(join([$rec.POS, $rec_ref, $rec_alt], sep="\t") )
       discard addViNode(barcodeTable = barcodeTable,  alleleCountTable = alleleCountTable,  scSpermSeq = scSpermSeq,
                outFileTotalCountMtx = outFileTotalCountMtx, outFileAltCountMtx  = outFileAltCountMtx, nnsize = nnsize,
-               mindp = mindp, maxdp = maxdp, thetaRef = thetaRef, thetaAlt = thetaAlt, snpIndex = snpIndex, rec=rec,
+               mindp = mindp, maxdp = maxdp, thetaRef = thetaRef, thetaAlt = thetaAlt, snpIndex = snpIndex, rec_pos=int(rec.POS),
                initProb = initProb, cmPmb = cmPmb)
     var posEnd: int64
     var inferProb,reverseProb = 0.0
@@ -124,13 +173,15 @@ proc sgcocaller(threads:int, ivcf:VCF, barcodeTable:OrderedTableRef,
   return 0
 
 when(isMainModule):
-  let version = "0.3.2"
+  let version = "0.3.3"
   var doc = format("""
   $version
 
   Usage:
       sgcocaller fmf [options] <BAM> <VCF> <barcodeFile> <out_prefix>
       sgcocaller xo [options] <BAM> <VCF> <barcodeFile> <out_prefix>
+      sgcocaller phase_blocks [options] <VCF> <fmf> <hapfile> <out_vcf>
+      
 
 Arguments:
 
@@ -142,6 +193,10 @@ Arguments:
 
   <out_prefix>  the prefix of output files
 
+  <fmf> the fragment file from running sgcocaller fmf
+
+  <out_vcf> the output vcf aftering phasing blocks in hapfile
+
 
 Options:
   -t --threads <threads>  number of BAM decompression threads [default: 4]
@@ -157,21 +212,21 @@ Options:
   --thetaREF <thetaREF>  the theta for the binomial distribution conditioning on hidden state being REF [default: 0.1]
   --thetaALT <thetaALT>  the theta for the binomial distribution conditioning on hidden state being ALT [default: 0.9]
   --cmPmb <cmPmb>  the average centiMorgan distances per megabases default 0.1 cm per Mb [default: 0.1]
+  --phased  the VCF contains the phased GT of heterozygous
   -h --help  show help
 
 
   Examples
       ./sgcocaller fmf --threads 4 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/cellfragment.fmf
       ./sgcocaller xo --threads 4 AAAGTAGCACGTCTCT-1.raw.bam AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz barcodeFile.tsv ./percell/ccsnp
+      ./sgcocaller phase_blocks --threads 4 AAAGTAGCACGTCTCT-1.raw.bam.dp3.alt.vcf.gz ./percell/cellfragment.fmf ./percell/ccsnp/linkedBlocks.vcf.gz
+
 """ % ["version", version])
 
   let args = docopt(doc, version=version)
   
   var
     threads:int
-    vcff:string
-    barcodeFile:string
-    bamfile:string
     selectedChrs:string
     out_dir:string
     mapq:int
@@ -185,6 +240,9 @@ Options:
     cmPmb:float
     barcodeTag="CB"
     minsnpdepth:int
+    out_vcf,fmf,barcodeFile,bamfile,vcff,hapfile:string
+    vcfGtPhased = false
+
   echo $args
   threads = parse_int($args["--threads"])
   barcodeTag = $args["--barcodeTag"]
@@ -193,64 +251,77 @@ Options:
   maxtotal = parse_int($args["--maxTotalDP"])
   mintotal = parse_int($args["--minTotalDP"])
   minsnpdepth = parse_int($args["--minSNPdepth"])
-
-  bamfile = $args["<BAM>"]
-  barcodeFile = $args["<barcodeFile>"]
-  out_dir = $args["<out_prefix>"]
-  vcff = $args["<VCF>"]
   mapq = parse_int($args["--minMAPQ"])
   minbsq = parse_int($args["--baseq"])
   thetaRef = parse_float($args["--thetaREF"])
   thetaAlt = parse_float($args["--thetaALT"])
   cmPmb = parse_float($args["--cmPmb"])
+  vcfGtPhased = parse_bool($args["--phased"])
   
-  var 
-    ibam:Bam
-    ivcf:VCF
-    s_Chrs: seq[string]
-  if not open(ibam, bamfile, threads=threads, index = true):
-      quit "couldn't open input bam"
-  if not open(ivcf, vcff, threads=threads):
-      quit "couldn't open: vcf file"
-
-  if($args["--chrom"] != "nil"):
-    selectedChrs = $args["--chrom"]
-    s_Chrs = selectedChrs.split(',')
-  else:
-    ## find all chroms from headers of vcf file (Contigs)
-    let contigs = ivcf.contigs 
-    s_Chrs = map(contigs, proc(x: Contig): string = x.name)
-
-  var hf = hts.hts_open(cstring(barcodeFile), "r")
-  #### TODO : Table size
-  var barcodeTable =  newOrderedTable[string,int](initialSize = 1024)
-  var kstr: hts.kstring_t
-  kstr.l = 0
-  kstr.m = 0
-  kstr.s = nil
-  ## initiate the table with CB as keys, allele counts (ref object) as elements
-  while hts_getline(hf, cint(10), addr kstr) > 0:
-    if $kstr.s[0] == "#":
-      continue
-    var v = $kstr.s
-    discard barcodeTable.hasKeyOrPut(v, 0)
-  discard hf.hts_close()
-  if args["fmf"]:
-    echo "generating fragment file to " & out_dir
-    var outfmf:FileStream
-    try: 
-      outfmf = openFileStream(out_dir, fmWrite)
-    except:
-      stderr.write getCurrentExceptionMsg()
-    discard getfmf(ibam = ibam, ivcf = ivcf, barcodeTable = barcodeTable, outfmf = outfmf, maxTotalReads = maxtotal,
-                   minTotalReads = mintotal, mapq = mapq, minbsq =minbsq, mindp = mindp, barcodeTag = barcodeTag,minsnpdepth=minsnpdepth)
-    close(outfmf)
-  if args["xo"]:
-    echo "running crossover calling \n"
-    echo "running for these chromosomes as specified in the header of the provided VCF file " & $s_Chrs
-    discard sgcocaller(threads, ivcf, barcodeTable, ibam,
-                     out_dir, mapq, minbsq, mintotal, 
-                     maxtotal, mindp, maxdp, thetaREF, thetaALT, cmPmb,s_Chrs,barcodeTag)
-
-  ibam.close()
-  ivcf.close()
\ No newline at end of file
+  if args["fmf"] or args["xo"]:
+    var 
+      ibam:Bam
+      ivcf:VCF
+      s_Chrs: seq[string]
+    bamfile = $args["<BAM>"]
+    barcodeFile = $args["<barcodeFile>"]
+    out_dir = $args["<out_prefix>"]
+    vcff = $args["<VCF>"]
+
+    if not open(ibam, bamfile, threads=threads, index = true):
+        quit "couldn't open input bam"
+    if not open(ivcf, vcff, threads=threads):
+        quit "couldn't open: vcf file"
+
+    if($args["--chrom"] != "nil"):
+      selectedChrs = $args["--chrom"]
+      s_Chrs = selectedChrs.split(',')
+    else:
+      ## find all chroms from headers of vcf file (Contigs)
+      let contigs = ivcf.contigs 
+      s_Chrs = map(contigs, proc(x: Contig): string = x.name)
+
+    var hf = hts.hts_open(cstring(barcodeFile), "r")
+    #### TODO : Table size
+    var barcodeTable =  newTable[string,int](initialSize = 1024)
+    var kstr: hts.kstring_t
+    kstr.l = 0
+    kstr.m = 0
+    kstr.s = nil
+    var ithSperm = 0
+    ## initiate the table with CB as keys, allele counts (ref object) as elements
+    while hts_getline(hf, cint(10), addr kstr) > 0:
+      if $kstr.s[0] == "#":
+        continue
+      var v = $kstr.s
+      discard barcodeTable.hasKeyOrPut(v, ithSperm)
+      ithSperm.inc
+    discard hf.hts_close()
+    if args["fmf"]:
+      echo "generating fragment file to " & out_dir
+      var outfmf:FileStream
+      try: 
+        outfmf = openFileStream(out_dir, fmWrite)
+      except:
+        stderr.write getCurrentExceptionMsg()
+      discard getfmf(ibam = ibam, ivcf = ivcf, barcodeTable = barcodeTable, outfmf = outfmf, maxTotalReads = maxtotal,
+                    minTotalReads = mintotal, mapq = mapq, minbsq =minbsq, mindp = mindp, barcodeTag = barcodeTag,minsnpdepth=minsnpdepth)
+      close(outfmf)
+    if args["xo"]:
+      echo "running crossover calling \n"
+      echo "running for these chromosomes as specified in the header of the provided VCF file " & $s_Chrs
+      discard sgcocaller(threads, ivcf, barcodeTable, ibam,
+                      out_dir, mapq, minbsq, mintotal, 
+                      maxtotal, mindp, maxdp, thetaREF, thetaALT, cmPmb,s_Chrs,barcodeTag,phased = vcfGtPhased)
+    ibam.close()
+    ivcf.close()
+  if args["phase_blocks"]:
+    fmf = $args["<fmf>"]
+    out_vcf = $args["<out_vcf>"]
+    vcff = $args["<VCF>"]
+    hapfile =  $args["<hapfile>"]
+    echo "running phasing blocks \n"
+    discard phaseBlocks(fmf_file = fmf, inputVCF=vcff, outputVCF=out_vcf, block_hapfile=hapfile, threads=threads)
+
+
+ 
\ No newline at end of file