||
#! /usr/bin/env python
'''
contact: shengweima@icloud.com
usuage: python choose_seq.py test.fa id.txt output.fa
'''
print ("usuage: python choose_seq.py test.fa id.txt output.fa")
import sys
import os
from Bio import SeqIO
f1 = os.path.abspath(sys.argv[1])
f2 = os.path.abspath(sys.argv[2])
f3 = os.path.abspath(sys.argv[3])
input_file = f1
id_file = f2
output_file = f3
wanted = set(line.rstrip("n").split(None,1)[0] for line in open(id_file))
print("Found %i unique identifiers in %s" % (len(wanted), id_file))
records = (r for r in SeqIO.parse(input_file, "fasta") if r.id in wanted)
count = SeqIO.write(records, output_file, "fasta")
print("Saved %i records from %s to %s" % (count, input_file, output_file))
if count < len(wanted):
print("Warning %i IDs not found in %s" % (len(wanted)-count, input_file))
Archiver|手机版|科学网 ( 京ICP备07017567号-12 )
GMT+8, 2024-12-23 18:53
Powered by ScienceNet.cn
Copyright © 2007- 中国科学报社