abi2fasta.py

Hello, BioPython World!
The fact that BioPython 1.58 now supports abi format encouraged me to work on abi2fasta.py. I made 3 trials, one with SeqIO.convert(), but the problem is that it creates separate files for each abi, and I needed them in one fasta. Another way was to append on a single fasta. A third way is to parse the abi, get the info, then convert, but I was not sure whether it will return  a SeqRecord object or not. I tried SeqIO.to_dict() which is cool, but leaves you to deal with dicts. enumerate(SeqIO.parse()) is cool, however, I liked the to_dict() more! Yes! Then, I found that SeqIO.parse() already returns a SeqRecord object 🙂
#!/usr/bin/python
import sys
import traceback #Dv
import os
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio.SeqRecord import SeqRecord
__author__=”mariamrizkallah”
__date__ =”$Jan 18, 2012 4:29:17 PM$”
try:
    print “Welcome to abi2fasta.py”
    count = 0
    path = sys.argv[1]
    output_dir = sys.argv[2]
    output_file = sys.argv[3]
    handle_fasta = open(output_file+”.fasta”, “a”)
    handle_fastq = open(output_file+”.fastq”, “a”)
    handle_qual = open(output_file+”.qual”, “a”)
    ls = os.listdir(path)
    for file in ls:
        ext = os.path.splitext(file)[1]
        rt = os.path.splitext(file)[0]
        if ext == “.ab1”:
            for seq_record in SeqIO.parse(path+file, “abi”):
                seq_record.id = path.strip(“/”)+’_’+seq_record.id+’_’+seq_record.name
                #print seq_record.id, seq_record.name, seq_record.seq
                #record = SeqRecord(seq=seq_record.seq, id=path.strip(“/”)+’_’+seq_record.id+’_’+seq_record.name, description=””)
                handle_fasta.write(seq_record.format(“fasta”))
                handle_fastq.write(seq_record.format(“fastq”))
                handle_qual.write(seq_record.format(“qual”))
            #Generator returns a dict {‘MySeq’: SeqRecord(seq=Seq(‘AATTGGCC’, IUPACUnambiguousDNA()), id=’MySeq’, name=’abi_name’, description=”, dbxrefs=[])}
            #record_dict = SeqIO.to_dict(SeqIO.parse(path+file, “abi”))
            #seq_record = str(record_dict.values()).strip(‘[]’)
            #for index, record in enumerate(SeqIO.parse(path+file, “abi”)):
                #print “ID = %s, length %i, with %i features” % (record.id, len(record.seq), len(record.features))
            #append to one file
            #SeqIO.convert(path+file, “abi”, handle_fasta, “fasta”)
            #SeqIO.convert(path+file, “abi”, handle_fastq, “fastq”)
            #SeqIO.convert(path+file, “abi”, handle_qual, “qual”)
            #separate files
            #SeqIO.convert(path+file, “abi”, output_dir+rt+”.fastq”, “fastq”)
            #SeqIO.convert(path+file, “abi”, output_dir+rt+”.fasta”, “fasta”)
            count += 1
    print “Converted %i records” % count
except traceback, t:
    print “[Usage]: ./abi2fasta abi_dir output_dir output_file”
    print print_tb(t)

 

Advertisements