WORDFILE = '/usr/share/dict/words' words = [line.strip() for line in open(WORDFILE) if line.strip()] #-------------------------------------------------------------------------- # Challenge A: There is a word in the list that has 7 occurrences of character 'i'. What is it? # # Answer: infinito-infinitesimal ans = '' for w in words: if w.count('i') == 7: ans = w print print 'Word with 7 occurrences of i is:', ans #-------------------------------------------------------------------------- # Challenge B: Find how many hyphenated expressions they have of the form "blank-to-blank" # # Answer: 23 examples = [] for w in words: pieces = w.split('-') if len(pieces) == 3 and pieces[1] == 'to' and pieces[0] == pieces[2]: examples.append(w) print print 'There are', len(examples), 'phrases of the form "blank-to-blank" for some blank' print 'These are: ', ', '.join(examples) #-------------------------------------------------------------------------- # Challenge C: What is the longest word in the file, and what is its length? # (in case of tie, report any longest word, although it turns # out to be unique for our data set) # # Answer: pneumonoultramicroscopicsilicovolcanoconiosis (length 45) longest = '' for w in words: if len(w) > len(longest): longest = w print print 'longest word, having length', len(longest), 'is', longest #-------------------------------------------------------------------------- # Challenge D: If we consider only alphabetic characters in the wordlist, of either case, # what percentage of them are vowels (AEIOU)? We would like to know to the nearest hundreth of a percent. # # Answer: 38.84% totalAlpha = 0 totalVowels = 0 for w in words: for c in w.lower(): if c.isalpha(): totalAlpha += 1 if c in 'aeiou': totalVowels += 1 print print 'Percentage of vowels among letters is', 100.0 * totalVowels / totalAlpha #-------------------------------------------------------------------------- # Challenge E: classic spelling rule is "i before e except after c, ..." # We will look for those exceptions to the typical rule, ignoring case. How many such exceptions exist in our data set? # # Answer: 5375 (but 5377 if not considering words of form "Cei...") special = [] for w in words: bad = False for j in range(len(w)-1): if w[j]=='e' and w[j+1]=='i' and (j==0 or w[j-1] not in 'cC'): bad = True if bad: special.append(w) # Note difference between above logic and the less accurate test # 'ei' in w and 'cei' not in w and 'Cei' not in w # if word such as "ceiling-weight" had been in dictinoary if bad != ('ei' in w and 'cei' not in w and 'Cei' not in w): print 'WARNING: careful with', w print print "There are", len(special), "words that are exceptions to the 'i before e' rule" #-------------------------------------------------------------------------- # Challenge F: How many words appear in both proper and improper form? # # Answer: 14403 proper = [] for k in range(1,len(words)): if words[k-1] == words[k].capitalize(): proper.append(words[k-1]) print print 'There are', len(proper), 'examples of words that are proper and improper' #-------------------------------------------------------------------------- # Challenge G: what entry has the most number of pairs of consecutive matching letters? # For example, 'coffee' has two such pairs. # # The most such pairs is 4. Those entries are: # ['killeekillee', 'possessionlessness', 'soogee-moogee', 'subbookkeeper', 'successlessness', 'too-soonness'] ans = [] count = 0 for w in words: temp = 0 for j in range(1,len(w)): if w[j-1] == w[j] and (j==1 or w[j-2] != w[j-1]): temp += 1 if temp > count: count = temp ans = [w] elif temp == count: count = temp ans.append(w) print print 'An entry with', count, 'repeated pairs is', ans #-------------------------------------------------------------------------- # Challenge H: How many words have pattern 'abcde' as subsequence? # # Answer: 25 pattern = 'abcde' soln = [] for w in words: j = 0 for c in pattern: if j != -1: j = w.find(c, j) # find the next occurrence of next character of the pattern if j != -1: soln.append(w) print print 'There are', len(soln), 'words with', pattern, 'as a subsequence' print soln #-------------------------------------------------------------------------- # Challenge I: How many american/british pairs can we find ending respectively er/re # (as in theater/theatre) # # Answer: 207 american = [] british = [] goal = '' for w in words: if w.islower() and len(w) > 2: if w.endswith('er'): american.append(w) if w.endswith('re'): a = w[:-2] + 'er' if a in american: # might be slow to check british.append(w) print print 'There appear to be', len(british), 'pairs of American/British words with er vs re ending' #print british