"""
Database file parsing in biopython
- Fasta
- GenBank
- SwissProt
see section 5.1 in the biopython tutorial
http://biopython.org/DIST/docs/tutorial/Tutorial.html
The SeqRecord class is described in section 4.1
"""
from Bio import SeqIO
parser = SeqIO.parse("cam.fasta", "fasta")
print parser
record = parser.next()
# it is an iterator we can loop thru FastaIterator using a for loop:
from Bio import SeqIO
for seq_record in SeqIO.parse("cam.fasta", "fasta") :
print seq_record.id
print seq_record.seq
print len(seq_record.seq)
# Essentially the same code can be used to go thru files in uniprot format
from Bio import SeqIO
parser = SeqIO.parse("cam.txt", "swiss")
print parser
record = parser.next()
from Bio import SeqIO
for seq_record in SeqIO.parse("cam.txt", "swiss") :
print seq_record.id
print seq_record.seq
print len(seq_record.seq)
# If we had a file in genbank format we give the string "genbank" as
# an argument to the parse function.
from Bio import SeqIO
for seq_record in SeqIO.parse("cam.gbk", "genbank") :
print seq_record.id
print seq_record.seq
print len(seq_record.seq)
# Note: use the 'read' function in SeqIO to read data from a sequence file that
# contains a single record.
# getting a list of records:
from Bio import SeqIO
# using list comprehensions
records = [record for record in SeqIO.parse("cam.fasta", "fasta")]
# or using the list constructor:
records = list(SeqIO.parse("cam.fasta", "fasta"))
