diff --git a/src/sgcocaller.nim b/src/sgcocaller.nim
index b6f7529476fac25973bdb959a675e12e684f004b..77942269236828ee62ba4d25f394db7a54807b5e 100755
--- a/src/sgcocaller.nim
+++ b/src/sgcocaller.nim
@@ -15,7 +15,7 @@ import sgcocaller/sgphase
 import sgcocaller/writeVCF
 import sgcocaller/correctPhase
 import sgcocaller/sgcocaller_sxo
-
+import times
 
 let initProb:array[stateRef..stateAlt, float]=[0.5,0.5]
 
@@ -26,18 +26,18 @@ proc sgcocaller(threads:int, ivcf:VCF, barcodeTable:TableRef,
   
   var bulkBam = false
   if barcodeTable.len == 1 and barcodeTable.hasKey("bulk"):
-    echo "running in bulk mode and all reads are regarded as from one sample/cell"
+    echo $now() & " running in bulk mode and all reads are regarded as from one sample/cell"
     bulkBam = true
   else:
-    echo "running sgcoaller xo for " & $barcodeTable.len & " single cells"
+    echo $now() & " running sgcoaller xo for " & $barcodeTable.len & " single cells"
 
   ## iterate through each selected chromosome
   for chrom in s_Chrs:
     var snpIndex, nnsize = 0
     ## number of non zeros
-    var scSpermSeq:SeqSpermViNodes
+    var scSpermSeq = newTable[int,SpermViNodes]()
     ## matches with the order in barcodeTable
-    scSpermSeq.setLen(barcodeTable.len)
+    ## scSpermSeq.setLen(barcodeTable.len)
     var outFileSNPanno,outFileTotalCountMtx,outFileAltCountMtx,outFileVStateMtx,viSegmentInfo:FileStream
     let sparseMatrixHeader = "%%MatrixMarket matrix coordinate integer general"
     try:
@@ -91,17 +91,17 @@ proc sgcocaller(threads:int, ivcf:VCF, barcodeTable:TableRef,
   return 0
 
 when(isMainModule):
-  let version = "0.3.7"
+  let version = "0.3.9"
   var doc = format("""
   $version
   Usage:
+      sgcocaller autophase [options] <BAM> <VCF> <barcodeFile> <out_prefix>
       sgcocaller phase [options] <BAM> <VCF> <barcodeFile> <out_prefix> 
       sgcocaller swphase [options] <gtMtxFile> <phasedSnpAnnotFile> <referenceVCF> <out_prefix> 
       sgcocaller sxo [options] <SNPPhaseFile> <phaseOutputPrefix> <barcodeFile> <out_prefix>
       sgcocaller xo [options] <BAM> <VCF> <barcodeFile> <out_prefix>
       
 
-
 Arguments:
 
   <BAM> the read alignment file with records of single-cell DNA reads
@@ -112,10 +112,11 @@ Arguments:
 
   <out_prefix>  the prefix of output files
 
-  <fmf> the fragment file from running sgcocaller fmf
-
   <out_vcf> the output vcf aftering phasing blocks in hapfile
+  
+  <gtMtxFile> the output gtMtx.mtx file from running sgcocaller phase
 
+  <phasedSnpAnnotFile>  the output phased_snpAnnot.txt from running sgcocaller phase
 
 Options:
   -t --threads <threads>  number of BAM decompression threads [default: 4]
@@ -142,28 +143,34 @@ Options:
   --minPositiveSwitchScores <minPositiveSwitchScores>  the min number of continuing SNPs with positive switch scores to do switch error correction [default: 8]  
   --binSize <binSize>  the size of SNP bins for scanning swith errors, users are recommended to increase this option when SNP density is high. [default: 2000]
   --stepSize <stepSize>  the move step size used in combination with --binSize. [default: 200]
+  --dissimThresh <dissimThresh>  the threshold used on the allele concordance ratio for determining if a SNP bin contains a crossover. [default: 0.0099]
+  --batchSize <batchSize>  the number of cells to process in one batch when running sxo. This option is only needed when the memory is limited. 
+  --notSortMtx  do not sort the output mtx. 
+  --maxUseNcells <maxUseNcells>  the number of cells to use for calculating switch scores. All cells are used if not set
   -h --help  show help
 
 
   Examples
-      ./sgcocaller phase gtMtxFile phaseOutputPrefix
+      ./sgcocaller autophase possorted_bam.bam hetSNPs.vcf.gz barcodeFile.tsv phaseOutputPrefix
+      ./sgcocaller phase possorted_bam.bam hetSNPs.vcf.gz barcodeFile.tsv phaseOutputPrefix
       ./sgcocaller xo --threads 4 possorted_bam.bam phased_hetSNPs.vcf.gz barcodeFile.tsv ./percell/ccsnp
-      ./sgcocaller sxo phaseOutputPrefix barcodeFile.tsv ./percell/ccsnp
+      ./sgcocaller sxo snp_phase.txt phaseOutputPrefix barcodeFile.tsv ./percell/ccsnp
 
 """ % ["version", version])
 
   let args = docopt(doc, version=version)
   
   var
-    threads,mapq,minbsq,mintotal,maxtotal,mindp,maxdp,minsnpdepth,maxExpand,lookBeyondSnps,minPositiveSwitchScores,binSize,stepSize:int
-    thetaREF,thetaALT,cmPmb,posteriorProbMin,maxDissim,minSwitchScore:float
+    threads,mapq,minbsq,mintotal,maxtotal,mindp,maxdp,minsnpdepth,maxExpand,lookBeyondSnps,minPositiveSwitchScores,binSize,stepSize,batchSize,maxUseNcells:int
+    thetaREF,thetaALT,cmPmb,posteriorProbMin,maxDissim,minSwitchScore,dissimThresh:float
     barcodeTag="CB"
     out_dir,selectedChrs,barcodeFile,bamfile,vcff:string
-    vcfGtPhased = false
+    vcfGtPhased,notSortMtx = false
     s_Chrs: seq[string]
     outFileTotalCountMtxFile, outFileAltCountMtxFile: string
 
-  echo $args
+  echo $now() & $args
+  echo $now() & " running sgcocaller " & $version
   threads = parse_int($args["--threads"])
   barcodeTag = $args["--barcodeTag"]
   mindp = parse_int($args["--minDP"])
@@ -177,18 +184,33 @@ Options:
   lookBeyondSnps = parse_int($args["--lookBeyondSnps"])
   binSize = parse_int($args["--binSize"])
   stepSize = parse_int($args["--stepSize"])
+  dissimThresh = parse_float($args["--dissimThresh"])
   minPositiveSwitchScores =  parse_int($args["--minPositiveSwitchScores"])
   thetaRef = parse_float($args["--thetaREF"])
   thetaAlt = parse_float($args["--thetaALT"])
   posteriorProbMin = parse_float($args["--posteriorProbMin"])
   maxDissim = parse_float($args["--maxDissim"])
   minSwitchScore = parse_float($args["--minSwitchScore"])
+  if $args["--maxUseNcells"] != "nil": maxUseNcells = parse_int($args["--maxUseNcells"])
   cmPmb = parse_float($args["--cmPmb"])
   vcfGtPhased = parse_bool($args["--phased"])
+  notSortMtx = parse_bool($args["--notSortMtx"])
   out_dir = $args["<out_prefix>"]
   if (out_dir[^1] != '_') and (out_dir[^1] != '/') : out_dir = out_dir & "_"
-
-  if args["phase"] or args["xo"] or args["sxo"]:
+  if $args["--batchSize"] != "nil":
+    batchSize = parse_int($args["--batchSize"])
+  else:
+    batchSize = 0
+  var run_phase, run_swphase, run_xo, run_sxo, run_auto_phase = false
+  run_phase = args["phase"]
+  run_swphase = args["swphase"]
+  run_xo = args["xo"]
+  run_sxo = args["sxo"]
+  run_auto_phase = args["autophase"]
+  if run_auto_phase:
+    run_phase = true
+    run_swphase = true
+  if run_phase or run_xo or run_sxo:
     barcodeFile = $args["<barcodeFile>"]
     var hf = hts.hts_open(cstring(barcodeFile), "r")
     var barcodeTable =  newTable[string,int](initialSize = 1024)
@@ -204,7 +226,7 @@ Options:
       discard barcodeTable.hasKeyOrPut(v, ithSperm)
       ithSperm.inc
     discard hf.hts_close()
-    if args["phase"] or args["xo"]:
+    if run_phase or run_xo:
       var 
         ibam:Bam
         ivcf:VCF
@@ -221,12 +243,13 @@ Options:
           quit "couldn't open input bam"
       if not open(ivcf, vcff, threads=threads):
           quit "couldn't open: vcf file"
-      if args["phase"]:
+      if run_phase:
+        echo $now() & " running phase"
         var templateCell = parse_int($args["--templateCell"])     
         var outGtMtxFile, outTotalCountMtxFile,outAltCountMtxFile, outSnpAnnotFile, outphasedSnpAnnotFile,outdiagnosticDataframeFile : string
         var outGtMtx, outSnpAnnot, outTotalCountMtx, outAltCountMtx:FileStream
         for chrom in s_Chrs:
-          echo "generating genotype sparse matrix file to " & out_dir & "for chr " & chrom
+          echo $now() & " generating genotype sparse matrix file to " & out_dir & "for chr: " & chrom
           outGtMtxFile = out_dir & chrom & "_gtMtx.mtx"
           outTotalCountMtxFile = out_dir & chrom &  "_totalMtx.mtx"
           outAltCountMtxFile = out_dir & chrom &  "_altMtx.mtx"
@@ -240,66 +263,80 @@ Options:
             outAltCountMtx = openFileStream(outAltCountMtxFile, fmWrite)
           except:
             stderr.write getCurrentExceptionMsg()
-          discard getGtMtx(ibam = ibam, ivcf = ivcf, barcodeTable = barcodeTable, outGtMtx = outGtMtx, 
-                          outTotalCountMtx = outTotalCountMtx ,outAltCountMtx = outAltCountMtx,
-                          outSnpAnnot = outSnpAnnot, maxTotalReads = maxtotal,
-                          minTotalReads = mintotal, mapq = mapq, minbsq = minbsq, minCellDp = mindp,maxCellDp =maxdp,
-                          barcodeTag = barcodeTag, minsnpdepth=minsnpdepth, chrom = chrom)                    
-          for mtxFile in [outGtMtxFile, outTotalCountMtxFile, outAltCountMtxFile]:
-            var imtx = readMtx(mtx_file = mtxFile)
-            discard sortWriteMtx(imtx, mtx_file = mtxFile) 
-          echo "running sgcocaller phase from single cell genotype matrix (of one chromosome) " & outGtMtxFile
-          echo "using the " & outSnpAnnotFile & " for generating phased haplotypes"
+          discard getGtMtx(ibam = ibam, ivcf = ivcf, barcodeTable = barcodeTable, outGtMtx = outGtMtx, outTotalCountMtx = outTotalCountMtx ,outAltCountMtx = outAltCountMtx, outSnpAnnot = outSnpAnnot, maxTotalReads = maxtotal, minTotalReads = mintotal, mapq = mapq, minbsq = minbsq, minCellDp = mindp,maxCellDp = maxdp,barcodeTag = barcodeTag, minsnpdepth = minsnpdepth, chrom = chrom)                    
+          if not notSortMtx:
+            for mtxFile in [outGtMtxFile, outTotalCountMtxFile, outAltCountMtxFile]:
+              var imtx = readMtx(mtx_file = mtxFile)
+              discard sortWriteMtx(imtx, mtx_file = mtxFile) 
+          echo $now() & "running sgcocaller phase from single cell genotype matrix (of one chromosome) " & outGtMtxFile
+          echo $now() & "using the " & outSnpAnnotFile & " for generating phased haplotypes"
           discard sgphase(mtxFile = outGtMtxFile, snpAnnotFile = outSnpAnnotFile, phasedSnpAnnotFile = outphasedSnpAnnotFile,
                           diagnosticDataframeFile = outdiagnosticDataframeFile, templateCell = templateCell, 
                           maxExpand = maxExpand, posteriorProbMin = posteriorProbMin,maxDissim = maxDissim)
-          if parse_bool($args["--outvcf"]):
-            var outvcfFile = out_dir & chrom &  "_phased_snpAnnot.vcf.gz"   
+          if parse_bool($args["--outvcf"]) and (not run_swphase):
+            var outvcfFile = out_dir & chrom &  "_phased_snpAnnot.vcf.gz"  
+            echo $now() & " write phased haplotypes to VCF " & outvcfFile
             discard writePhaseToVCF(vcff, outvcfFile, outphasedSnpAnnotFile,threads = threads)    
-      elif args["xo"]:
-        echo "running crossover calling from VCF and BAM file\n"
-        echo "running for these chromosomes: " & $s_Chrs
-        discard sgcocaller(threads, ivcf, barcodeTable, ibam,
-                        out_dir, mapq, minbsq, mintotal, 
-                        maxtotal, mindp, maxdp, thetaREF, thetaALT, cmPmb, s_Chrs,barcodeTag,phased = vcfGtPhased)
+          if run_swphase:
+            echo $now() & " running swphase to find switch spots and generate corrected phase"
+            let switchedPhasedAnnotFile = out_dir & chrom & "_corrected_phased_snpAnnot.txt"
+            let switchScoreFile = out_dir & chrom & "_switch_score.txt" 
+            let switchedPhasedAnnotVcfFile  = out_dir & chrom & "_corrected_phased_snpAnnot.vcf.gz"
+            echo $now() & "scanning with swphase to find switch spots and generate corrected phase"
+            discard correctPhase(outGtMtxFile, outphasedSnpAnnotFile, switchedPhasedAnnotFile, switchScoreFile, lookBeyondSnps = lookBeyondSnps, minSwitchScore = minSwitchScore,
+                                 minPositiveSwitchScores = minPositiveSwitchScores, binSize = binSize, stepSize = stepSize,dissimThresh = dissimThresh, maxUseNcells = maxUseNcells )
+            echo $now() & " write corrected phased haplotypes to " & switchedPhasedAnnotVcfFile
+            discard writePhaseToVCF(vcff, switchedPhasedAnnotVcfFile, switchedPhasedAnnotFile, add_header_string = """##sgcocaller_v0.3.9=swphase""",threads = threads, chrom = chrom)    
+      elif run_xo:
+        echo $now() & " running crossover calling from VCF and BAM file\n"
+        echo $now() & " running for these chromosomes: " & $s_Chrs
+        discard sgcocaller(threads, ivcf, barcodeTable, ibam, out_dir, mapq, minbsq, mintotal, maxtotal, mindp, maxdp, thetaREF, thetaALT, cmPmb, s_Chrs, barcodeTag, phased = vcfGtPhased)
         ## sort entries in mtx files 
-        for chrom in s_Chrs:
-          outFileTotalCountMtxFile = out_dir & chrom & "_totalCount.mtx"
-          outFileAltCountMtxFile = out_dir & chrom & "_altCount.mtx"
-          for mtxFile in [outFileTotalCountMtxFile, outFileAltCountMtxFile]:
-            var imtx = readMtx(mtx_file = mtxFile)
-            discard sortWriteMtx(imtx, mtx_file = mtxFile)
+        if not notSortMtx:
+          for chrom in s_Chrs:
+            outFileTotalCountMtxFile = out_dir & chrom & "_totalCount.mtx"
+            outFileAltCountMtxFile = out_dir & chrom & "_altCount.mtx"
+            for mtxFile in [outFileTotalCountMtxFile, outFileAltCountMtxFile]:
+              var imtx = readMtx(mtx_file = mtxFile)
+              discard sortWriteMtx(imtx, mtx_file = mtxFile)
       ibam.close()
       ivcf.close()
-    elif args["sxo"]:
+    elif run_sxo:
       if($args["--chrom"] != "nil"):
         selectedChrs = $args["--chrom"]
         s_Chrs = selectedChrs.split(',')
       else:
         quit "chromosomes need to be supplied via --chrom. Supply multiple chromosomes (will be processed sequencially) using comma as separator"
-      echo "running crossover calling from genotype and allele count matrices generated from sgcocaller phase/swphase "
-      echo "running for these chromosomes : " & $s_Chrs
+      echo $now() & " running crossover calling from genotype and allele count matrices generated from sgcocaller phase/swphase "
+      echo $now() & " running for these chromosomes : " & $s_Chrs
       let phase_dir = $args["<phaseOutputPrefix>"]
       let phasedSnpAnnotFile = $args["<SNPPhaseFile>"]
-      discard sgcocallerSXO(barcodeTable = barcodeTable, phase_dir = phase_dir, out_dir = out_dir,thetaREF = thetaREF, thetaALT = thetaALT, cmPmb = cmPmb,s_Chrs =s_Chrs,initProb = initProb, phasedSnpAnnotFileName = phasedSnpAnnotFile)
-      for chrom in s_Chrs:
-        outFileTotalCountMtxFile = out_dir & chrom & "_totalCount.mtx"
-        outFileAltCountMtxFile = out_dir & chrom & "_altCount.mtx"
-        for mtxFile in [outFileTotalCountMtxFile, outFileAltCountMtxFile]:
-          var imtx = readMtx(mtx_file = mtxFile)
-          discard sortWriteMtx(imtx, mtx_file = mtxFile)
-  elif args["swphase"]:
-    echo "running swphase to find switch spots and generate corrected phase"
+      if (batchSize == 0) or (batchSize >= barcodeTable.len):
+        batchSize = barcodeTable.len        
+      discard sgcocallerSXO(barcodeTable = barcodeTable, phase_dir = phase_dir, out_dir = out_dir,
+                            thetaREF = thetaREF, thetaALT = thetaALT, cmPmb = cmPmb,s_Chrs =s_Chrs,initProb = initProb,
+                            phasedSnpAnnotFileName = phasedSnpAnnotFile,batchSize=batchSize)
+      if not notSortMtx:
+        for chrom in s_Chrs:
+          outFileTotalCountMtxFile = out_dir & chrom & "_totalCount.mtx"
+          outFileAltCountMtxFile = out_dir & chrom & "_altCount.mtx"
+          for mtxFile in [outFileTotalCountMtxFile, outFileAltCountMtxFile]:
+            var imtx = readMtx(mtx_file = mtxFile)
+            discard sortWriteMtx(imtx, mtx_file = mtxFile)
+  elif run_swphase:
+    echo $now() & " running swphase to find switch spots and generate corrected phase"
     let gtMtxFile =  $args["<gtMtxFile>"]
     let phasedSnpAnnotFile = $args["<phasedSnpAnnotFile>"]
     let switchedPhasedAnnotFile = out_dir & "corrected_phased_snpAnnot.txt"
     let switchScoreFile = out_dir & "switch_score.txt" 
     let switchedPhasedAnnotVcfFile  = out_dir & "corrected_phased_snpAnnot.vcf.gz"
-    discard correctPhase(gtMtxFile,phasedSnpAnnotFile,switchedPhasedAnnotFile,switchScoreFile,lookBeyondSnps = lookBeyondSnps,minSwitchScore = minSwitchScore, minPositiveSwitchScores = minPositiveSwitchScores, binSize = binSize, stepSize = stepSize)
+    echo $now() & " scanning with swphase to find switch spots and generate corrected phase"
+    discard correctPhase(gtMtxFile,phasedSnpAnnotFile,switchedPhasedAnnotFile,switchScoreFile,lookBeyondSnps = lookBeyondSnps,minSwitchScore = minSwitchScore, 
+                         minPositiveSwitchScores = minPositiveSwitchScores, binSize = binSize, stepSize = stepSize, dissimThresh =dissimThresh,maxUseNcells = maxUseNcells)
     if ($args["--chrom"] == "nil"):
-      echo "Assuming supplied VCF only contains the SNPs for the relevant chromosome. If this is not the case, use the --chrom option"
-    
-    discard writePhaseToVCF($args["<referenceVCF>"], switchedPhasedAnnotVcfFile, switchedPhasedAnnotFile, add_header_string = """##sgcocaller_v0.1=swphase""",threads = threads, chrom = $args["--chrom"])
+      echo $now() & " assuming supplied VCF only contains the SNPs for the relevant chromosome. If this is not the case, use the --chrom option"
+    echo $now() & " write corrected phased haplotypes to " & switchedPhasedAnnotVcfFile
+    discard writePhaseToVCF($args["<referenceVCF>"], switchedPhasedAnnotVcfFile, switchedPhasedAnnotFile, add_header_string = """##sgcocaller_v0.3.9=swphase""",threads = threads, chrom = $args["--chrom"])
 
 
 
diff --git a/src/sgcocaller/correctPhase.nim b/src/sgcocaller/correctPhase.nim
index d5046d19fff22cb692905c593afc891ba4827c69..86a3b2316eb63858cac7cb2cb2cdc39978bdc3a4 100755
--- a/src/sgcocaller/correctPhase.nim
+++ b/src/sgcocaller/correctPhase.nim
@@ -4,13 +4,16 @@ import sequtils
 import math
 import streams
 import strutils
+import times
+
 
 # let binSize = 2000
 # let movingStep = 200
-let dissimThresh = 0.0099 
+#let dissimThresh = 0.0099 
 #let lookBeyondSnps = 25
 let debug = false
 let switchPrior = 0.5
+#let maxUseNcells = 100
 type 
   switchScoreTuple = tuple
     switch_scores: seq[float]
@@ -24,7 +27,7 @@ type
 ## simply comparing the dissimilary btw template geno seq with cell's geno seq for the SNPs in the bin
 
 ## cell_geno snp by cell
-proc hasCrossover(templ_geno:seq[BinaryGeno], cell_geno: seq[seq[BinaryGeno]]): bool = 
+proc hasCrossover(templ_geno:seq[BinaryGeno], cell_geno: seq[seq[BinaryGeno]], dissimThresh: float): bool = 
   let ncells = cell_geno[0].len
   var ncompared = newSeq[int](ncells)
   var nmatch = newSeq[int](ncells)
@@ -46,33 +49,36 @@ proc hasCrossover(templ_geno:seq[BinaryGeno], cell_geno: seq[seq[BinaryGeno]]):
 #    if debug: echo "bin had many crossovers : " & $nxcells
     return true
 
-proc findHighRiskSnps(fullGeno:seq[BinaryGeno], gtMtxByCell:seq[seq[BinaryGeno]], binSize:int, movingStep:int): seq[int] = 
+proc findHighRiskSnps(fullGeno:seq[BinaryGeno], gtMtxByCell:seq[seq[BinaryGeno]], binSize:int, movingStep:int, dissimThresh:float): seq[int] = 
   #let ncells = gtMtxByCell[0].len
   let nSnps = gtMtxByCell.len
   let nBins = (int)(ceil(nSnps / (binSize - movingStep)))
-#  if debug: echo "nBins: " & $nBins
+#  echo "nBins: " & $nBins
   let binIds = toSeq(0..(nBins-1))
   var snpPosStart,snpPosEnd: int
   var highRiskSnps: seq[int]
   for binI in binIds:
-#    if debug: echo "binI " & $binI
+#    echo "binI " & $binI
     snpPosStart = (binI)*(binSize - movingStep)
     if (snpPosStart + binSize) > (nSnps-1): 
       snpPosEnd = (nSnps-1)
     else:
       snpPosEnd = snpPosStart + binSize
     if (hasCrossover(templ_geno =  fullGeno[snpPosStart..snpPosEnd], 
-                     cell_geno =  gtMtxByCell[snpPosStart..snpPosEnd] )):
+                     cell_geno =  gtMtxByCell[snpPosStart..snpPosEnd],
+                     dissimThresh = dissimThresh )):
       highRiskSnps = concat(highRiskSnps,toSeq(snpPosStart..snpPosEnd))
   
   # add a last bin as from end of chrom with length binSize
   let lastBinStart = nSnps - binSize
-  let lastBinEnd = nSnps - 1
-  if (hasCrossover(templ_geno =  fullGeno[snpPosStart..snpPosEnd], 
-                     cell_geno =  gtMtxByCell[snpPosStart..snpPosEnd])):
-    highRiskSnps = concat(highRiskSnps,toSeq(lastBinStart..lastBinEnd))
-
+  if lastBinStart > 0:
+    let lastBinEnd = nSnps - 1
+    if (hasCrossover(templ_geno =  fullGeno[snpPosStart..snpPosEnd], 
+                    cell_geno =  gtMtxByCell[snpPosStart..snpPosEnd],
+                    dissimThresh = dissimThresh)):
+      highRiskSnps = concat(highRiskSnps,toSeq(lastBinStart..lastBinEnd))
   highRiskSnps = deduplicate(highRiskSnps)
+# echo highRiskSnps
   return highRiskSnps
    
 proc readPhasedSnpAnnot(phasedSnpAnnotFileStream: FileStream, nSnps:int): seq[BinaryGeno] =
@@ -81,7 +87,7 @@ proc readPhasedSnpAnnot(phasedSnpAnnotFileStream: FileStream, nSnps:int): seq[Bi
   var currentEntrySeq: seq[string]
   while not phasedSnpAnnotFileStream.atEnd():
     currentEntrySeq = phasedSnpAnnotFileStream.readLine().splitWhitespace()
-    fullGeno[i] = parseInt(currentEntrySeq[3]).toBinaryGeno(format = "01")
+    fullGeno[i] = int8(parseInt(currentEntrySeq[3])).toBinaryGeno(format = "01")
     i.inc
   if i != nSnps :
     quit "phased snpAnnot file does not have the same number of rows with gtMtx"
@@ -126,7 +132,7 @@ proc switchHap(hap: seq[BinaryGeno]): seq[BinaryGeno] =
 proc getIthCellHap(gtMtxByCell:seq[seq[BinaryGeno]],cellIndex:int):seq[BinaryGeno] = 
   map(gtMtxByCell, proc(y: seq[BinaryGeno]): BinaryGeno = y[cellIndex])
 
-proc calSwitchScore(riskySnps: seq[int], gtMtxByCell:seq[seq[BinaryGeno]], fullGeno: seq[BinaryGeno], lookBeyondSnps = 25): switchScoreTuple = 
+proc calSwitchScore(riskySnps: seq[int], gtMtxByCell:seq[seq[BinaryGeno]], fullGeno: seq[BinaryGeno], lookBeyondSnps = 25, maxUseNcells:int): switchScoreTuple = 
   var letfIndexStart,letfIndexEnd,rightIndexStart,rightIndexEnd,offset: int
   let nSnps = gtMtxByCell.len
   let ncells =  gtMtxByCell[0].len
@@ -134,11 +140,17 @@ proc calSwitchScore(riskySnps: seq[int], gtMtxByCell:seq[seq[BinaryGeno]], fullG
   var prob_switch, prob_nonsw: float
   var leftIndex,rightIndex, switch_score_snpIndex: seq[int]
   var swscore = newSeq[float](nSnps)
+  var useCells = newSeq[int]()
+  if (ncells <= maxUseNcells) or (maxUseNcells==0):
+    useCells = (0..(ncells-1)).toSeq()
+  else:
+    let everyN = (int)floor(ncells/maxUseNcells)
+    useCells = map(toSeq(0..(maxUseNcells-1)),proc(x:int):int = (x * everyN))
   for rsnpi in riskySnps:
     prob_switch = 0.0
     prob_nonsw = 0.0
     if fullGeno[rsnpi] == gUnknown: continue
-    for celli in 0..(ncells-1):
+    for celli in useCells:
       # if celli == 2: continue
       leftIndex = newSeq[int]()
       rightIndex = newSeq[int]()
@@ -234,7 +246,7 @@ proc writeSwitchedPhase(siteToSwitch:seq[int],switchedPhasedAnnotFile:string, ph
   phaseSnpAnnotFileFS.close()
   return 0
 
-proc correctPhase*(gtMtxFile: string, phaseSnpAnnotFile:string, switchedPhasedAnnotFile:string, switchScoreFile: string, lookBeyondSnps = 25,minSwitchScore:float, minPositiveSwitchScores:int, binSize:int, stepSize:int): int = 
+proc correctPhase*(gtMtxFile: string, phaseSnpAnnotFile:string, switchedPhasedAnnotFile:string, switchScoreFile: string, lookBeyondSnps = 25,minSwitchScore:float, minPositiveSwitchScores:int, binSize:int, stepSize:int, dissimThresh:float, maxUseNcells:int): int = 
   var gtMtxByCell: seq[seq[BinaryGeno]]
   var currentEntrySeq: seq[string]
   var currentEntry:seq[int]
@@ -247,25 +259,26 @@ proc correctPhase*(gtMtxFile: string, phaseSnpAnnotFile:string, switchedPhasedAn
     switchScoreFileStream = openFileStream(switchScoreFile, fmWrite)
   except:
     stderr.write getCurrentExceptionMsg() 
-  echo "reading gtMtx by cell"
+  echo $now() & "reading gtMtx by cell"
   discard gtMtxFileStream.readLine()
   discard phaseSnpAnnotFileStream.readLine()
   #N, i,j
   currentEntrySeq = gtMtxFileStream.readLine().splitWhitespace()
   currentEntry = map(currentEntrySeq, proc(x: string): int = parseInt(x))
   nSnps = currentEntry[0]
-  echo "nSnps " & $nSnps
+  echo "nSnps: " & $nSnps
   ncells = currentEntry[1]
-  echo "ncells " & $ncells
+  echo "ncells: " & $ncells
   echo "totalEntries " & $ currentEntry[2]
   ## gtMtx is cell by Snp format
   gtMtxByCell = newSeqWith(nSnps,newSeq[BinaryGeno](ncells))
   discard readGtMtxToSeq(mtxFileStream = gtMtxFileStream, gtMtx = gtMtxByCell, by_cell = true)
   var fullGeno = readPhasedSnpAnnot(phasedSnpAnnotFileStream = phaseSnpAnnotFileStream, nSnps = nSnps )
-  var riskySnps = findHighRiskSnps(fullGeno = fullGeno, gtMtxByCell = gtMtxByCell, binSize = binSize, movingStep = stepSize)
-  echo "riskySnps.length " & $riskySnps.len
-  var switchScoresT = calSwitchScore(riskySnps = riskySnps, gtMtxByCell =gtMtxByCell, fullGeno = fullGeno, lookBeyondSnps = lookBeyondSnps)
+  var riskySnps = findHighRiskSnps(fullGeno = fullGeno, gtMtxByCell = gtMtxByCell, binSize = binSize, movingStep = stepSize, dissimThresh = dissimThresh)
+  echo $now() & "riskySnps.length " & $riskySnps.len
+  var switchScoresT = calSwitchScore(riskySnps = riskySnps, gtMtxByCell = gtMtxByCell, fullGeno = fullGeno, lookBeyondSnps = lookBeyondSnps,  maxUseNcells= maxUseNcells)
   let switchSites = findSwitchSites(switchScoresT, lookBeyondSnps = lookBeyondSnps,minSwitchScore = minSwitchScore, minPositiveSwitchScores = minPositiveSwitchScores)
+  echo "see switch_sore.txt file, and corrected switch errors found at positions: " & $switchSites
   switchScoreFileStream.writeLine("#" & $switchSites)
   for snpi,score in switchScoresT.switch_scores:
     if switchScoresT.switch_scores_snpIndex.find(snpi)>=0:
diff --git a/src/sgcocaller/findGtNodes.nim b/src/sgcocaller/findGtNodes.nim
index 8885112ed7f81c7aadf0e9802d265f08b467e627..ca3c146fcf08d7a8d866bf0e381fc1872c154b9f 100755
--- a/src/sgcocaller/findGtNodes.nim
+++ b/src/sgcocaller/findGtNodes.nim
@@ -31,7 +31,7 @@ proc findGtNodes*(rec:Variant, variantIndex: int, ibam:Bam, maxTotalReads:int, m
     if not barcodeTable.hasKey(currentCB): 
       continue
     base_off = get_base_offset(position = rec.POS.cint, align = aln) 
-    base = aln.base_at(base_off)
+    base = aln.base_at(base_off)   
     if aln.base_quality_at(base_off).cint < minbsq: 
       continue
     total_reads+=1
diff --git a/src/sgcocaller/findPath.nim b/src/sgcocaller/findPath.nim
index 4b5ba6e10199d6d2c936d0287d31cbfb6eb12954..0a3013e8b94c3e4f190cbc388fff846e15e10302 100755
--- a/src/sgcocaller/findPath.nim
+++ b/src/sgcocaller/findPath.nim
@@ -101,26 +101,25 @@ proc pathTrackBackIthSperm(currentSperm: var SpermViNodes,
       inferProb = currentEm[stateAlt]+math.ln(transProb)
       reverseProb = currentEm[stateRef]+math.ln(1-transProb)
       viSegmentInfo.writeLine(join(["ithSperm" & $ithSperm, $posStart, $posEnd, $(inferProb-reverseProb) ,$cSNP, "2"],sep = " ") )
-  
   return 0
-proc pathTrackBack*(scSpermSeq:  var SeqSpermViNodes,
+proc pathTrackBack*(scSpermSeq:  TableRef[int,SpermViNodes],
                     thetaRef: float,
                     thetaAlt: float,
                     cmPmb: float,
                     outFileVStateMtx: var FileStream,
                     viSegmentInfo: var FileStream): int = 
-
   var posEnd: int64
   var inferProb,reverseProb = 0.0
   var currentEm: array[stateRef..stateAlt, float]
   var lastNode: ViNode
-  var spermVNseq: SpermViNodes
+#  var spermVNseq: SpermViNodes
+# 0..(scSpermSeq.len-1)
+# rightGap,leftGap = [0.0,0.0]
+# spermVNseq = scSpermSeq[ithSperm]
   var ithSNP: int
-
-  for ithSperm in 0..(scSpermSeq.len-1):
-    ## rightGap,leftGap = [0.0,0.0]
-    spermVNseq = scSpermSeq[ithSperm]
-    if spermVNseq.viNodeseq.len==0: continue
+  for ithSperm,spermVNseq in scSpermSeq:
+    if spermVNseq.viNodeseq.len==0: 
+      continue
     lastNode = spermVNseq.viNodeseq[high(spermVNseq.viNodeseq)]
     currentEm = getEmission(thetaRef=thetaRef,thetaAlt=thetaAlt,cRef=lastNode.cRef, cAlt=lastNode.cAlt)
     if lastNode.pathScore[stateRef] > lastNode.pathScore[stateAlt]:
diff --git a/src/sgcocaller/getGtMtx.nim b/src/sgcocaller/getGtMtx.nim
index 90816bc2425a67e962ab05d70598d82062774a49..7d4e84cbc12c2c365ddff2c11cae065ac9884b95 100755
--- a/src/sgcocaller/getGtMtx.nim
+++ b/src/sgcocaller/getGtMtx.nim
@@ -30,9 +30,7 @@ proc getGtMtx*(ibam:Bam, ivcf:VCF, barcodeTable:TableRef, outGtMtx:FileStream, o
   for rec in ivcf.query(chrom): 
     var rec_alt = rec.ALT[0][0]
     var rec_ref = rec.REF[0]
-    cellGtNodes = findGtNodes(rec=rec, variantIndex = variantIndex, ibam=ibam, maxTotalReads=maxTotalReads,
-                                  minTotalReads=minTotalReads, mapq = mapq, barcodeTable = barcodeTable,
-                                  minbsq=minbsq,barcodeTag=barcodeTag)
+    cellGtNodes = findGtNodes(rec=rec, variantIndex = variantIndex, ibam=ibam, maxTotalReads=maxTotalReads,minTotalReads=minTotalReads, mapq = mapq, barcodeTable = barcodeTable,minbsq=minbsq,barcodeTag=barcodeTag)
     cellGtNodeCopy = cellGtNodes
     for cellbarcode in cellGtNodeCopy.keys:
       cellDP = cellGtNodes[cellbarcode].alleles.len
diff --git a/src/sgcocaller/graph.nim b/src/sgcocaller/graph.nim
index 215e1e64d11fef79e6eddf6d880bc3e340315066..83e88e63f76a2d214006ffbf9140bc8d5831d795 100755
--- a/src/sgcocaller/graph.nim
+++ b/src/sgcocaller/graph.nim
@@ -13,10 +13,11 @@ type
     snpIndexLookUp*: Table[int,int]
     ## look up from current Sperm SNP index to SNPIndex  
     spermSnpIndexLookUp*: Table[int,int]
-  SeqSpermViNodes* = seq[SpermViNodes]
+# SeqSpermViNodes* = seq[SpermViNodes]
 
-proc addViNodeIthSperm*(scSpermSeq: var SeqSpermViNodes, cAlt: int, cRef: int, ithSperm:int, emissionArray: array[stateRef..stateAlt, float], snpIndex:int, initProb: array[stateRef..stateAlt, float], rec_pos:int,cmPmb:float): int = 
-  if scSpermSeq[ithSperm].viNodeseq.len==0:
+proc addViNodeIthSperm*(scSpermSeq: TableRef[int,SpermViNodes], cAlt: int16, cRef: int16, ithSperm:int, emissionArray: array[stateRef..stateAlt, float], snpIndex:int, initProb: array[stateRef..stateAlt, float], rec_pos:int,cmPmb:float): int = 
+  if not scSpermSeq.hasKey(ithSperm):
+    scSpermSeq[ithSperm] = SpermViNodes()
     var currentViNode = ViNode()
     currentViNode.pathScore[stateRef] = math.ln(initProb[stateRef])+emissionArray[stateRef]
     currentViNode.pathScore[stateAlt] = math.ln(initProb[stateAlt])+emissionArray[stateAlt]
@@ -26,7 +27,6 @@ proc addViNodeIthSperm*(scSpermSeq: var SeqSpermViNodes, cAlt: int, cRef: int, i
     currentViNode.pos = rec_pos
     currentViNode.cAlt = cAlt
     currentViNode.cRef = cRef
-    
     scSpermSeq[ithSperm].viNodeseq.add(currentViNode)
     scSpermSeq[ithSperm].snpIndexLookUp[snpIndex] = scSpermSeq[ithSperm].viNodeseq.len
     scSpermSeq[ithSperm].spermSnpIndexLookUp[scSpermSeq[ithSperm].viNodeseq.len] = snpIndex
@@ -65,7 +65,7 @@ proc addViNodeIthSperm*(scSpermSeq: var SeqSpermViNodes, cAlt: int, cRef: int, i
   return 0
 proc addViNode*(barcodeTable: TableRef, 
                alleleCountTable: Table[string,allele_expr],
-               scSpermSeq: var SeqSpermViNodes,
+               scSpermSeq: TableRef[int,SpermViNodes],
                outFileTotalCountMtx: var FileStream,
                outFileAltCountMtx: var FileStream,
                nnsize: var int,
@@ -80,13 +80,13 @@ proc addViNode*(barcodeTable: TableRef,
   for bc, ac in alleleCountTable.pairs:
     # ac.tostring(acs)
     ## mindp, maxdp, they are values per cell
-    if (ac.cref+ac.calt) <= mindp or (ac.cref+ac.calt) >= maxdp: continue        
+    if (ac.cref+ac.calt) < mindp or (ac.cref+ac.calt) > maxdp: continue        
     var ithSperm = barcodeTable[bc]
     ## write to mtx Ref count
     outFileTotalCountMtx.writeLine($snpIndex & " " & $(ithSperm+1) & " " & $(ac.cRef+ac.cAlt))
     outFileAltCountMtx.writeLine($snpIndex & " " & $(ithSperm+1) & " " & $ac.cAlt)
     nnsize += 1
     var emissionArray = getEmission(thetaRef=thetaRef,thetaAlt=thetaAlt,cRef=ac.cRef,cAlt=ac.cAlt)
-    discard addViNodeIthSperm(scSpermSeq = scSpermSeq, cAlt = int(ac.calt), cRef = int(ac.cref), ithSperm = ithSperm, emissionArray =emissionArray, snpIndex =snpIndex,initProb = initProb,rec_pos =rec_pos,cmPmb = cmPmb)
+    discard addViNodeIthSperm(scSpermSeq = scSpermSeq, cAlt = ac.calt, cRef = ac.cref, ithSperm = ithSperm, emissionArray =emissionArray, snpIndex =snpIndex,initProb = initProb,rec_pos =rec_pos,cmPmb = cmPmb)
   return 0
 # The size line  
diff --git a/src/sgcocaller/inferMissingSnps.nim b/src/sgcocaller/inferMissingSnps.nim
index 676978ce2bbbcfd1462a30839b0a68f6137315ec..95bf50698435e4d0b6a3330a9e5b1265d1839641 100755
--- a/src/sgcocaller/inferMissingSnps.nim
+++ b/src/sgcocaller/inferMissingSnps.nim
@@ -37,11 +37,11 @@ proc inferGeno(type00:int, type10:int, error_rate = 0.1,posterior_thresh: float)
   let prob_1_p = 1 - prob_0_p 
   if max(prob_0_p,prob_1_p) >= posterior_thresh:
 #    echo "posterior_prob: " & $(max(prob_0_p,prob_1_p))
-    if prob_0_p > prob_1_p:
-#      echo "returned inferred geno: gREF"
+    if prob_0_p > prob_1_p: 
+      # "returned inferred geno: gREF"
       return gREF
     else:
-#      echo "returned inferred geno: gALT"
+      #"returned inferred geno: gALT"
       return gALT
   return gUnknown
 # return full sequence of template geno by inferring missing SNPs's genotypes 
@@ -75,7 +75,7 @@ proc inferSnpGeno*(templateGeno: seq[BinaryGeno], gtMtx:seq[seq[BinaryGeno]], po
         offset += 1
         totalIter += 1
       if coexisPos.len < int(nAnchorSnps/2):
-        if debug: echo "not enough coexisting SNPs for jth cell: " & $j 
+#        if debug: echo "not enough coexisting SNPs for jth cell: " & $j 
         continue
       snpLD = matchTemplateHap(cell_geno = map(coexisPos,proc(x:int): BinaryGeno =  gtMtx[j][x]), 
                                temp_geno = map(coexisPos,proc(x:int): BinaryGeno =  templateGeno[x]) )       
diff --git a/src/sgcocaller/utils.nim b/src/sgcocaller/utils.nim
index 01370af097ed3a68c0fa66b81d510d2183e5999c..9935127f27f732f768cfb897c54d3fd84225b7f9 100755
--- a/src/sgcocaller/utils.nim
+++ b/src/sgcocaller/utils.nim
@@ -9,16 +9,16 @@ import sequtils
 
 type 
   allele_expr* = ref object
-    cref*: int
-    calt*: int
+    cref*: int16
+    calt*: int16
   ViState* = enum
     stateRef, stateAlt, stateN
   BinaryGeno* = enum
     gUnknown, gREF, gALT
   ViNode* = object
     pos*: int
-    cRef*: int
-    cAlt*: int
+    cRef*: int16
+    cAlt*: int16
     pathScore*: array[stateRef..stateAlt, float]
     pathState*: array[stateRef..stateAlt, ViState]      
     state*: ViState  
@@ -37,7 +37,7 @@ type
   MtxRow* = tuple
     rowIndex: int
     colIndex: int
-    value: int
+    value: int16
 type 
   Mtx* = ref object
     header*: string
@@ -70,10 +70,10 @@ proc get_base_offset*(position:int, align: Record): int =
     # get the base 
     base_offset = qoff - over-1
     break 
-  return base_offset   
+  return base_offset    
 
 
-proc toBinaryGeno*(x: int, format = "12") : BinaryGeno = 
+proc toBinaryGeno*(x: int8, format = "12") : BinaryGeno = 
   if(format == "01"):
     if x == 1: return gALT
     if x == 0: return gREF
@@ -88,15 +88,18 @@ proc toBinaryGeno*(x: int, format = "12") : BinaryGeno =
 proc readGtMtxToSeq*(mtxFileStream:FileStream, gtMtx:var seq[seq[BinaryGeno]], by_cell = false):int = 
   var currentEntrySeq:seq[int]
   var currentLine:seq[string]
-
-  while not mtxFileStream.atEnd():
-    currentLine = mtxFileStream.readLine().splitWhitespace()
-    ## i j 1-based from file
-    currentEntrySeq = map(currentLine, proc(x: string): int = parseInt(x))
-    if by_cell:
-      gtMtx[(currentEntrySeq[0]-1)][(currentEntrySeq[1]-1)] = currentEntrySeq[2].toBinaryGeno
-    else:
-      gtMtx[(currentEntrySeq[1]-1)][(currentEntrySeq[0]-1)] = currentEntrySeq[2].toBinaryGeno
+  if by_cell:
+    while not mtxFileStream.atEnd():
+      currentLine = mtxFileStream.readLine().splitWhitespace()
+      ## i j 1-based from file
+      currentEntrySeq = map(currentLine, proc(x: string): int = parseInt(x))
+      gtMtx[(currentEntrySeq[0]-1)][(currentEntrySeq[1]-1)] = int8(currentEntrySeq[2]).toBinaryGeno
+  else:
+    while not mtxFileStream.atEnd():
+      currentLine = mtxFileStream.readLine().splitWhitespace()
+      ## i j 1-based from file
+      currentEntrySeq = map(currentLine, proc(x: string): int = parseInt(x))
+      gtMtx[(currentEntrySeq[1]-1)][(currentEntrySeq[0]-1)] = int8(currentEntrySeq[2]).toBinaryGeno
   return 0
 
 proc add_allele*(g:GtNode, alleleBinGeno:int):string = g.alleles & $alleleBinGeno
@@ -105,7 +108,7 @@ proc inc_count_cref*(a:allele_expr) = inc(a.cref)
 proc inc_count_calt*(a:allele_expr) = inc(a.calt)
 
 proc getEmission*(thetaRef=0.1,thetaAlt=0.9,
-                  cRef:int,cAlt:int): array[stateRef..stateAlt, float] =
+                  cRef:int16,cAlt:int16): array[stateRef..stateAlt, float] =
   
   var emissionScore = [dbinom(x=float(cAlt),size=(cRef+cAlt),prob=thetaRef,log=true),
                         dbinom(x=float(cAlt),size=(cRef+cAlt),prob=thetaAlt,log=true)]
@@ -182,7 +185,7 @@ proc readMtx*(mtx_file:string): Mtx =
     current_line = mtxFileStream.readLine()
     sparseMtx.lines.add((rowIndex: parseInt(current_line.splitWhitespace()[0]),
                          colIndex: parseInt(current_line.splitWhitespace()[1]),
-                         value: parseInt(current_line.splitWhitespace()[2]) ))
+                         value: int16(parseInt(current_line.splitWhitespace()[2])) ))
   mtxFileStream.close()
 
   return sparseMtx