Source code for seqtools.varscan

#!/usr/bin/env python
"""
parse VCF output from VarScan
and fix the ALT column
to adhere with VCF specifications
"""
import re

[docs]def fixLine(line): """Fix a varscan VCF line Prints the output to stdout. Fixes the ALT column and also fixes the FREQ field to be a floating point value, easier for filtering. :param line: a pre-split and stripped varscan line """ line = line.strip() if(line.startswith("##")): line=line.replace('FREQ,Number=1,Type=String', 'FREQ,Number=1,Type=Float') return line if(line.startswith("#CHROM")): return line line = line.split('\t') try: REF,ALT = line[3:5] except ValueError: return "\t".join(line) + "\n" Ifreq = line[8].split(":").index("FREQ") ndat = line[9].split(":") tdat = line[10].split(":") ndat[Ifreq] = str(float(ndat[Ifreq].rstrip("%"))/100) tdat[Ifreq] = str(float(tdat[Ifreq].rstrip("%"))/100) line[9]=":".join(ndat) line[10]=":".join(tdat) if "+" in ALT or "-" in ALT: if "/" not in ALT: if ALT[0] == "+": R = REF A = REF + re.sub(r'^[+-][\d]?','',ALT) elif ALT[0] == "-": R = REF + re.sub(r'^[+-][\d]?','',ALT) A = REF else: Ins = [p[1:] for p in ALT.split("/") if p[0]=="+"] Del = [p[1:] for p in ALT.split("/") if p[0]=="-"] if len(Del): REF += sorted(Del,key= lambda x: len(x))[-1] A = ",".join([REF[::-1].replace(p[::-1], "", 1)[::-1] for p in Del] + [REF+p for p in Ins]) R = REF REF = R ALT = A else: ALT = ALT.replace('/',',') line[3] = REF line[4] = ALT return "\t".join(line)
[docs]def fixVarscanVcfFile(iterable): """Takes an interator over a varscan VCF file and returns an iterator over fixed VCF lines, including header. :param iterable: any iterable of the VCF lines :returns: An iterator over fixed VCF lines Usage is like so: >>> from seqtools.varscan import fixVarscanVcfFile >>> varscan = fixVarscanVcfFile(open('filename.vcf','r')) >>> for line in varscan: print line """ for line in iterable: yield fixLine(line)
def main(): import argparse,sys parser = argparse.ArgumentParser('Parse VCF output from Varscan to output valid VCF. Output is to stdout.') parser.add_argument('-v','--varscan', help="varscan vcf output file name") opts = parser.parse_args() if(not opts.varscan): varscan = fixVarscanVcfFile(sys.stdin) else: varscan = fixVarscanVcfFile(open(opts.varscan)) for line in varscan: print(line) if __name__ == '__main__': main()