Extract sff for selected IDs into a child sff (version 2) 

I thank Peter Cock, Brad Chapman, and Arnold Daniels for the development of the second version.

#!/usr/bin/python

from Bio import SeqIO
import os, sys

#Script to extract sequences into a child sff from a parent sff
#Author: Mariam Rizkallah - August 16, 2011
#argv[1] -> parent sff
#argv[2] -> fasta or txt contains list of IDs
#argv[3] -> child sff
#Example: ./selectFromSff.py parent.sff new_fasta.fasta child.sff (p.s. chmod +x)

#./selectFromSFF_args.py G5UAOIB07.sff seq.fna short_new.sff
parent_sff = sys.argv[1]
ext = os.path.splitext(sys.argv[2])[1] #Ref: http://www.jasny.net/articles/how-to-get-a-file-extension/
if ext == ".fasta" or ext == ".fna":
	selected_fasta = sys.argv[2]
	#Ref: Peter Cock http://bit.ly/ngqPW7
	idsList = set(r.id for r in
			SeqIO.parse(selected_fasta, "fasta")
			)
else:
	#Ref: http://permalink.gmane.org/gmane.comp.python.bio.general/5175
	ids_list = sys.argv[2]
	ids = open(ids_list, "r")
	idsList = []
	for i in ids:
		idsList.append(i.rstrip()) #remove \n
child_sff = sys.argv[3]

#Ref: http://biopython.org/SRC/biopython/Bio/SeqIO/SffIO.py
records = (record for record in
		SeqIO.parse(parent_sff, "sff")
		if record.id in idsList
		)
count = SeqIO.write(records, child_sff, "sff")
print "Selected %i records" % count
Advertisements