""" Sequences in biopython """ """ Biopython is a collection of modules for Python bioinformatics programing. Some of its capabilities that you may find useful are: - Handling sequences and alignments - Parsing a large variety of database and data formats - Interfacing with databases - Motif analysis """ from Bio.Seq import Seq # Seq is biopython's class for storing biological sequences my_seq = Seq("AGTACACTGGT") # note the difference between print my_seq # and my_seq # we didn't specify an alphabet so it's my_seq.alphabet # here's some of the functionality of a sequence object: my_seq.reverse_complement() my_seq.translate() # you can specify an alphabet when creating a sequence object: from Bio.Alphabet import IUPAC my_seq = Seq("AGTACACTGGT", IUPAC.unambiguous_dna) # let's look at the alphabet: my_seq.alphabet my_dna = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC", IUPAC.unambiguous.dna) my_rna = my_dna.transcribe() my_prot = my_rna.translate() seq = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG", IUPAC.unambiguous_rna) seq.translate() seq.translate(to_stop=True) from Bio.Alphabet import IUPAC from Bio.SeqUtils import GC my_seq = Seq('GATCGATGGGCCTATATAGGATCGAAAATCGC', IUPAC.unambiguous_dna) print GC(my_seq) from Bio.Alphabet import IUPAC cam1 = Seq("MADQLTDEQISEFKEAFSLFDKDGDGCITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTIDFPEFLNLMAKKMKDTDSEEELKEAFRVFDKDQNGFISAAELRHVMTNLGEKLTDEEVEEMIREADVDGDGQINYEEFVKIMMAK", IUPAC.protein) cam1.alphabet # In many ways a sequence object behaves like a string. # You can iterate over its letters: for amino_acid in cam1 : print amino_acid, # You can index it like a string: print cam1[0] print cam1[-1] # and create slices: cam_slice = cam1[10:20] print cam_slice # note that a slice of a sequence object is a sequence object of the same type # you can add sequences like you would strings: cam1 + cam1 # what do you think would happen if we tried to add a dna and a protein sequence? # A Seq object is immutable. There is also a mutable version called MutableSeq