"""
Sequences in biopython
"""


"""
Biopython is a collection of modules for Python bioinformatics programing.
Some of its capabilities that you may find useful are:
- Handling sequences and alignments
- Parsing a large variety of database and data formats
- Interfacing with databases
- Motif analysis
"""



from Bio.Seq import Seq
# Seq is biopython's class for storing biological sequences
my_seq = Seq("AGTACACTGGT")

# note the difference between
print my_seq
# and
my_seq
# we didn't specify an alphabet so it's
my_seq.alphabet

# here's some of the functionality of a sequence object:
my_seq.reverse_complement()
my_seq.translate()

# you can specify an alphabet when creating a sequence object:

from Bio.Alphabet import IUPAC
my_seq = Seq("AGTACACTGGT", IUPAC.unambiguous_dna)
# let's look at the alphabet:
my_seq.alphabet


my_dna = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC", IUPAC.unambiguous.dna)
my_rna = my_dna.transcribe()
my_prot = my_rna.translate()
seq = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG", IUPAC.unambiguous_rna)
seq.translate()
seq.translate(to_stop=True)


from Bio.Alphabet import IUPAC
from Bio.SeqUtils import GC
my_seq = Seq('GATCGATGGGCCTATATAGGATCGAAAATCGC', IUPAC.unambiguous_dna)
print GC(my_seq)

from Bio.Alphabet import IUPAC
cam1 = Seq("MADQLTDEQISEFKEAFSLFDKDGDGCITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTIDFPEFLNLMAKKMKDTDSEEELKEAFRVFDKDQNGFISAAELRHVMTNLGEKLTDEEVEEMIREADVDGDGQINYEEFVKIMMAK", IUPAC.protein)
cam1.alphabet

# In many ways a sequence object behaves like a string.
# You can iterate over its letters:
for amino_acid in cam1 :
    print amino_acid,
# You can index it like a string:
print cam1[0]
print cam1[-1]

# and create slices:
cam_slice = cam1[10:20]
print cam_slice
# note that a slice of a sequence object is a sequence object of the same type

# you can add sequences like you would strings:
cam1 + cam1

# what do you think would happen if we tried to add a dna and a protein sequence?

# A Seq object is immutable.  There is also a mutable version called MutableSeq