## write output VCF file import hts import streams import strutils let debug = true proc readNextPhased(phasedAnnotFS:var FileStream): seq[string] = var phaseRec = newSeq[string](4) var lineRec: string while not phasedAnnotFS.atEnd(): lineRec = phasedAnnotFS.readLine() phaseRec = lineRec.splitWhitespace() if debug and phaseRec.len != 4: quit "len not 4 at " & lineRec & $phasedAnnotFS.atEnd() if phaseRec[3] == "-1": continue else: break return phaseRec proc writePhaseToVCF*(ivcfFile:string, ovcfFile: string, phasedAnnotFile: string, threads:int, add_header_string = """##sgcocaller_v0.1=phase""", chrom = "nil"):int = if debug: echo "ivcffile " & ivcfFile if debug: echo "ovcfFile " & ovcfFile if debug: echo "phasedAnnotFile " & phasedAnnotFile var ivcf: VCF var ovcf: VCF var v_off = 0 var phasedAnnotFS: FileStream var phaseRec = newSeq[string](4) var phasePos: int var phasePhase: int if not open(ivcf, ivcfFile, threads=threads): quit "couldn't open input vcf file" if not open(ovcf, ovcfFile, threads=threads, mode ="w"): quit "couldn't open output vcf file" try: phasedAnnotFS = openFileStream(phasedAnnotFile, fmRead) except: stderr.write getCurrentExceptionMsg() ovcf.copy_header(ivcf.header) discard ovcf.header.add_string(add_header_string) discard ovcf.write_header() discard phasedAnnotFS.readLine() var gt_string:seq[int32] phaseRec = readNextPhased(phasedAnnotFS) if debug: echo phasedAnnotFS.atEnd() phasePos = parseInt(phaseRec[0]) phasePhase = parseInt(phaseRec[3]) var queryChrom = chrom if queryChrom == "nil": queryChrom = "*" if debug: echo "wrote headers, and now starting write phased VCF" for v in ivcf.query(queryChrom): if v.POS.cint != phasePos: continue else: var f = v.format() # if debug: echo "wrote v in ivcf and set GT " & $v.POS.cint if phasePhase == 0: ## 0|1 #if debug: echo "block hap0 and this h0 is 0 0|1" gt_string = @[int32(2),int32(5)] elif phasePhase == 1: ## 1|0 #if debug: echo "block hap0 and this h0 is 1 1|0" gt_string = @[int32(4),int32(3)] else: if phasedAnnotFS.atEnd(): break if f.set("GT",gt_string) != Status.OK: quit "set GT failed" if not ovcf.write_variant(v) : quit "write vcf failed for " & $voff if phasedAnnotFS.atEnd(): break phaseRec = readNextPhased(phasedAnnotFS) if debug: if phaseRec.len != 4: echo " phaseRec.len " & $phaseRec.len echo " phaseRec" & $phaseRec phasePos = parseInt(phaseRec[0]) phasePhase = parseInt(phaseRec[3]) phasedAnnotFS.close() ovcf.close() ivcf.close() return 0 # let s_Chrs = map(toSeq(1..19), proc(x:int):string = "chr" & $x) # for chrom in @["chr8"]: # var ivcf = "data/swapped/FVB_NJ.mgp.v5.snps.dbSNP142.homo.alt.modified.swapped.GT." & chrom & ".vcf.gz" # var ovcf = "data/WC_CNV_42/sgcocaller/swphase/" & chrom & "_corrected_phased_snpAnnot.vcf.gz" # var swphasedSnpFile = "data/WC_CNV_42/sgcocaller/swphase/" & chrom & "_corrected_phased_snpAnnot.txt" # var gtMtxFile = "data/WC_CNV_42/sgcocaller/phaseOneStep/" & chrom & "_gtMtx.mtx" # var phaseSnpAnnotFile = "data/WC_CNV_42/sgcocaller/phaseOneStep/" & chrom & "_phased_snpAnnot.txt" # var switchScoreFile ="data/WC_CNV_42/sgcocaller/swphase/" & chrom & "_switch_score.txt" # var switchedPhasedAnnotVcfFile = "data/WC_CNV_42/sgcocaller/swphase/" & chrom & "_corrected_phased_snpAnnot.vcf.gz" # discard correctPhase(gtMtxFile,phaseSnpAnnotFile,swphasedSnpFile,switchScoreFile) # discard writePhaseToVCF(ivcf, ovcf, swphasedSnpFile,1)