"""Strings. Material from Chapter 7 http://openbookproject.net/thinkcs/python/english2e/ch07.html """ # The bracket operator selects a single character from a string: fruit = "banana" letter = fruit[1] print letter # The expression fruit[1] selects the character with index 1 from fruit. # When we display letter, we get a surprise: # a # The letter with index 1 is not the first letter! # Computer scientists always start counting from zero. # A nice explanation is given at: # http://www.cs.utexas.edu/~EWD/ewd08xx/EWD831.PDF # (the writer is Edsger W. Dijkstra, one of the founding fathers # of computer science) # The len function returns the number of characters in a string: fruit = "banana" print len(fruit) # Traversing the letters of a string can be done in two ways: fruit = "banana" for i in range(len(fruit)) : print fruit[i] # or fruit = "banana" for character in fruit : print character # we can extract a substring of a string using the bracket operator: s = "python java perl" print s[0:6] print s[7:11] print s[12:len(s)] # these are called slices # some shortcuts: print s[:6] # print a prefix of the string print s[12:] # print a suffix of the string # what do you think s[:] would give? print s[:] def extract_every_2nd(s) : result = '' # the empty string for i in range(0, len(s), 2) : result += s[i] return result # It is tempting to use the bracket operator in order to modify # a string. greeting = "Hello, world!" #greeting[0] = 'J' # ERROR! # strings are immutable, i.e. cannot be changed once created. # you will have to create a new string with the desired modification: greeting = "Hello, world!" new_greeting = 'J' + greeting[1:] print new_greeting # The in operator #The in operator tests if one string is a substring of another: 'p' in 'apple' 'i' in 'apple' 'ap' in 'apple' # Note that a string is a substring of itself: 'apple' in 'apple' def remove_vowels(s) : """a function that removes the vowels from a string""" vowels = "aeiouAEIOU" s_without_vowels = "" for letter in s: if letter not in vowels: s_without_vowels += letter return s_without_vowels # the in operator only determines if a string is a substring of # another one. here's a function that also provides the index # of the first occurrence: # (the book gives a version with a while loop, and it only finds # the location of a single character) def find(s, substr) : for index in range(len(s)) : if s[index:index+len(substr)] == substr: return index return -1 # here's a version that searches in s[start:end] # we use default arguments to provide start and end # as optional parameters def find(s, substr, start=0, end=None) : if end is None : end = len(s) for index in range(start, end) : if s[index:index+len(substr)] == substr: return index return -1 # Exercise: write a function that behaves like the # range function, except that instead of returning a # list it prints the numbers in the range. # the string module contains useful functions for manipulating strings. # among other things, it includes our find function! import string help(string.find) print string.find.__doc__ # let's write a function that determines if a character is lower case. # here are three ways of doing so: def is_lower(ch): return string.find(string.lowercase, ch) != -1 # notice the use of string.lowercase # using the in operator: def is_lower(ch): return ch in string.lowercase # using the comparison operator: def is_lower(ch): return 'a' <= ch <= 'z' def is_lowercase(s) : """determine if a string is all lowercase characters""" for c in s : if not(c in string.lowercase) : return False return True # Another constant defined in the string module may surprise you when you print it: print string.whitespace # Whitespace characters move the cursor without printing anything.