""" Database file parsing in biopython - Fasta - GenBank - SwissProt see section 5.1 in the biopython tutorial http://biopython.org/DIST/docs/tutorial/Tutorial.html The SeqRecord class is described in section 4.1 """ from Bio import SeqIO parser = SeqIO.parse("cam.fasta", "fasta") print parser record = parser.next() # it is an iterator we can loop thru FastaIterator using a for loop: from Bio import SeqIO for seq_record in SeqIO.parse("cam.fasta", "fasta") : print seq_record.id print seq_record.seq print len(seq_record.seq) # Essentially the same code can be used to go thru files in uniprot format from Bio import SeqIO parser = SeqIO.parse("cam.txt", "swiss") print parser record = parser.next() from Bio import SeqIO for seq_record in SeqIO.parse("cam.txt", "swiss") : print seq_record.id print seq_record.seq print len(seq_record.seq) # If we had a file in genbank format we give the string "genbank" as # an argument to the parse function. from Bio import SeqIO for seq_record in SeqIO.parse("cam.gbk", "genbank") : print seq_record.id print seq_record.seq print len(seq_record.seq) # Note: use the 'read' function in SeqIO to read data from a sequence file that # contains a single record. # getting a list of records: from Bio import SeqIO # using list comprehensions records = [record for record in SeqIO.parse("cam.fasta", "fasta")] # or using the list constructor: records = list(SeqIO.parse("cam.fasta", "fasta"))