################################################################# # Lab 04 Solution Set by Michael Goldwasser ################################################################# # uncomment ONE of the following two lines to pick your source data: #from human import dna from guinea_pig import dna ################################################################# # Question 0: How often is a base followed immediately by the same base? # (if completely random, we'd expect this to be 25%) ################################################################# count = 0 for k in range(len(dna)-1): # N.B. stop value if dna[k] == dna[k+1]: count += 1 percent = count/(len(dna)-1) * 100 print("percent of repeat bases: {:.3f}%".format(percent)) print() ################################################################# # Question 1: What percentage of codons across all primary reading frames are ATG? # (if completely random, we'd expect 1/64) ################################################################# count = 0 for k in range(len(dna)-2): # N.B. stop value if dna[k:k+3] == 'ATG': count += 1 percent = count/(len(dna)-2) * 100 print("percent of ATG codons: {:.3f}%".format(percent)) print() ################################################################# # Question 2: If two consecutive nucleotides match each other, # how often is the next nucleotide that same nucleotide? ################################################################# doubles = 0 triples = 0 for k in range(len(dna)-2): # N.B. stop value if dna[k] == dna[k+1]: # neighbors match doubles += 1 if dna[k] == dna[k+2]: # the third of the triple matches as well triples += 1 percent = 100*triples/doubles print("percent of identical pairs followed by a third: {:.3f}%".format(percent)) print() ################################################################# # Question 3: How many times does pattern CC?AT occur? ################################################################# total = 0 for k in range(len(dna)-4): # N.B. stop value if dna[k:k+2] == 'CC' and dna[k+3:k+5] == 'AT': total += 1 print("Pattern CC?AT occurs {} times".format(total)) print() ################################################################# # Question 4: For motif of the form CC?AT, # what is percentage of times that '?' is an A? ################################################################# motifs = 0 catbox = 0 for k in range(len(dna)-4): # N.B. stop value if dna[k:k+2] == 'CC' and dna[k+3:k+5] == 'AT': motifs += 1 if dna[k+2] == 'A': catbox += 1 percent = 100*catbox/motifs print("Percent of CC?AT having an A: {:.3f}%".format(percent)) print() ################################################################# # Question 5: What are the relative percentage of bases that immediately follow a 'CCAA' pattern? ################################################################# ca = 0 # count for A cc = 0 # count for C cg = 0 # count for G ct = 0 # count for T for k in range(len(dna)-4): # N.B. stop value if dna[k:k+4] == 'CCAA': if dna[k+4] == 'A': ca += 1 elif dna[k+4] == 'C': cc += 1 elif dna[k+4] == 'G': cg += 1 elif dna[k+4] == 'T': ct += 1 total = ca+cc+cg+ct print("Following CCAA is") print("A: {:.3f}%".format(100*ca/total)) print("C: {:.3f}%".format(100*cc/total)) print("G: {:.3f}%".format(100*cg/total)) print("T: {:.3f}%".format(100*ct/total)) print()