Skip to content
Snippets Groups Projects
writeVCF.nim 3.74 KiB
## write output VCF file
import hts
import streams
import strutils

let debug = true

proc readNextPhased(phasedAnnotFS:var FileStream): seq[string] = 
  var phaseRec = newSeq[string](4)
  var lineRec: string
  while not phasedAnnotFS.atEnd():
    lineRec = phasedAnnotFS.readLine()
    phaseRec = lineRec.splitWhitespace()
    if debug and phaseRec.len != 4:
      quit "len not 4 at " & lineRec & $phasedAnnotFS.atEnd()
    if phaseRec[3] == "-1": continue
    else: break
  return phaseRec

proc writePhaseToVCF*(ivcfFile:string, ovcfFile: string, phasedAnnotFile: string, threads:int, add_header_string = """##sgcocaller_v0.1=phase""", chrom = "nil"):int = 

  if debug: echo "ivcffile " & ivcfFile
  if debug: echo "ovcfFile " & ovcfFile
  if debug: echo "phasedAnnotFile " & phasedAnnotFile
  
  var ivcf: VCF
  var ovcf: VCF
  var v_off = 0
  var phasedAnnotFS: FileStream
  var phaseRec = newSeq[string](4)
  var phasePos: int
  var phasePhase: int
  if not open(ivcf, ivcfFile, threads=threads):
      quit "couldn't open input vcf file"
  if not open(ovcf, ovcfFile, threads=threads, mode ="w"):
    quit "couldn't open output vcf file"
  try: 
    phasedAnnotFS = openFileStream(phasedAnnotFile, fmRead)
  except:
    stderr.write getCurrentExceptionMsg()
  ovcf.copy_header(ivcf.header)
  discard ovcf.header.add_string(add_header_string)
  discard ovcf.write_header()
  discard phasedAnnotFS.readLine()
  var gt_string:seq[int32]
  phaseRec = readNextPhased(phasedAnnotFS)
  if debug: echo phasedAnnotFS.atEnd()
  phasePos = parseInt(phaseRec[0])
  phasePhase = parseInt(phaseRec[3])
  var queryChrom = chrom
  if queryChrom == "nil":
    queryChrom = "*"
  if debug: echo "wrote headers, and now starting write phased VCF"
  for v in ivcf.query(queryChrom):
    if v.POS.cint != phasePos: continue
    else:
      var f = v.format()
#      if debug: echo "wrote v in ivcf and set GT " & $v.POS.cint
      if phasePhase == 0:
        ## 0|1
        #if debug: echo "block hap0 and this h0 is 0 0|1"
        gt_string = @[int32(2),int32(5)]
      elif phasePhase == 1:
        ## 1|0
        #if debug: echo "block hap0 and this h0 is 1 1|0"
        gt_string = @[int32(4),int32(3)]
      else:
        if phasedAnnotFS.atEnd(): break 
      if f.set("GT",gt_string) != Status.OK:
        quit "set GT failed"
      if not ovcf.write_variant(v) :
        quit "write vcf failed for " & $voff
      if phasedAnnotFS.atEnd(): break 
      phaseRec = readNextPhased(phasedAnnotFS)
      if debug:
        if phaseRec.len != 4: 
          echo " phaseRec.len " &  $phaseRec.len
          echo " phaseRec" & $phaseRec
      phasePos = parseInt(phaseRec[0])
      phasePhase = parseInt(phaseRec[3])
  phasedAnnotFS.close()
  ovcf.close()
  ivcf.close()
  
  return 0

# let s_Chrs = map(toSeq(1..19), proc(x:int):string =  "chr" & $x)
# for chrom in @["chr8"]:
#   var ivcf = "data/swapped/FVB_NJ.mgp.v5.snps.dbSNP142.homo.alt.modified.swapped.GT." & chrom & ".vcf.gz"
#   var ovcf = "data/WC_CNV_42/sgcocaller/swphase/" & chrom & "_corrected_phased_snpAnnot.vcf.gz"
#   var swphasedSnpFile = "data/WC_CNV_42/sgcocaller/swphase/" & chrom & "_corrected_phased_snpAnnot.txt"
#   var gtMtxFile =  "data/WC_CNV_42/sgcocaller/phaseOneStep/" & chrom & "_gtMtx.mtx"
#   var phaseSnpAnnotFile = "data/WC_CNV_42/sgcocaller/phaseOneStep/" & chrom & "_phased_snpAnnot.txt"
#   var switchScoreFile ="data/WC_CNV_42/sgcocaller/swphase/" & chrom & "_switch_score.txt" 
#   var switchedPhasedAnnotVcfFile  = "data/WC_CNV_42/sgcocaller/swphase/" & chrom & "_corrected_phased_snpAnnot.vcf.gz"
#   discard correctPhase(gtMtxFile,phaseSnpAnnotFile,swphasedSnpFile,switchScoreFile)
#   discard writePhaseToVCF(ivcf, ovcf, swphasedSnpFile,1)