WORDFILE = '/usr/share/dict/words'
words = [line.strip() for line in open(WORDFILE) if line.strip()]

#--------------------------------------------------------------------------
# Challenge A: There is a word in the list that has 7 occurrences of character 'i'. What is it?
#
# Answer: infinito-infinitesimal

ans = ''
for w in words:
    if w.count('i') == 7:
        ans = w
print
print 'Word with 7 occurrences of i is:', ans


#--------------------------------------------------------------------------
# Challenge B:  Find how many hyphenated expressions they have of the form "blank-to-blank"
#
# Answer: 23

examples = []
for w in words:
    pieces = w.split('-')
    if len(pieces) == 3 and pieces[1] == 'to' and pieces[0] == pieces[2]:
        examples.append(w)
print
print 'There are', len(examples), 'phrases of the form "blank-to-blank" for some blank'
print 'These are: ', ', '.join(examples)

#--------------------------------------------------------------------------
# Challenge C: What is the longest word in the file, and what is its length?
#       (in case of tie, report any longest word, although it turns
#        out to be unique for our data set)
#
# Answer: pneumonoultramicroscopicsilicovolcanoconiosis (length 45)

longest = ''
for w in words:
    if len(w) > len(longest):
        longest = w
print
print 'longest word, having length', len(longest), 'is', longest

#--------------------------------------------------------------------------
# Challenge D:  If we consider only alphabetic characters in the wordlist, of either case,
#   what percentage of them are vowels (AEIOU)? We would like to know to the nearest hundreth of a percent.
#
# Answer: 38.84%   

totalAlpha = 0
totalVowels = 0
for w in words:
    for c in w.lower():
        if c.isalpha():
            totalAlpha += 1
            if c in 'aeiou':
                totalVowels += 1

print
print 'Percentage of vowels among letters is', 100.0 * totalVowels / totalAlpha

#--------------------------------------------------------------------------
# Challenge E:  classic spelling rule is "i before e except after c, ..."
# We will look for those exceptions to the typical rule, ignoring case. How many such exceptions exist in our data set?
#
# Answer: 5375  (but 5377 if not considering words of form "Cei...")

special = []
for w in words:
    bad = False
    for j in range(len(w)-1):
        if w[j]=='e' and w[j+1]=='i' and (j==0 or w[j-1] not in 'cC'):
            bad = True
    if bad:
        special.append(w)

    # Note difference between above logic and the less accurate test
    #   'ei' in w and 'cei' not in w and 'Cei' not in w
    # if word such as "ceiling-weight" had been in dictinoary
    if bad != ('ei' in w and 'cei' not in w and 'Cei' not in w):
        print 'WARNING: careful with', w

print
print "There are", len(special), "words that are exceptions to the 'i before e' rule"

#--------------------------------------------------------------------------
# Challenge F: How many words appear in both proper and improper form?
#
# Answer: 14403

proper = []
for k in range(1,len(words)):
    if words[k-1] == words[k].capitalize():
        proper.append(words[k-1])

print
print 'There are', len(proper), 'examples of words that are proper and improper'


#--------------------------------------------------------------------------
# Challenge G:  what entry has the most number of pairs of consecutive matching letters?
#        For example, 'coffee' has two such pairs.
#
# The most such pairs is 4. Those entries are:
#  ['killeekillee', 'possessionlessness', 'soogee-moogee', 'subbookkeeper', 'successlessness', 'too-soonness']

ans = []
count = 0

for w in words:
    temp = 0
    for j in range(1,len(w)):
        if w[j-1] == w[j] and (j==1 or w[j-2] != w[j-1]):
            temp += 1
    if temp > count:
        count = temp
        ans = [w]
    elif temp == count:
        count = temp
        ans.append(w)
        

print
print 'An entry with', count, 'repeated pairs is', ans

#--------------------------------------------------------------------------
# Challenge H: How many words have pattern 'abcde' as subsequence?
#
# Answer: 25

pattern = 'abcde'

soln = []
for w in words:
    j = 0
    for c in pattern:
        if j != -1:
            j = w.find(c, j)  # find the next occurrence of next character of the pattern

    if j != -1:
        soln.append(w)

print
print 'There are', len(soln), 'words with', pattern, 'as a subsequence'
print soln

#--------------------------------------------------------------------------
# Challenge I: How many american/british pairs can we find ending respectively er/re
#    (as in theater/theatre)
#
# Answer: 207

american = []
british = []
goal = ''
for w in words:
    if w.islower() and len(w) > 2:
        if w.endswith('er'):
            american.append(w)
        if w.endswith('re'):
            a = w[:-2] + 'er'
            if a in american:    # might be slow to check
                british.append(w)

print
print 'There appear to be', len(british), 'pairs of American/British words with er vs re ending'
#print british