#from human import dna from guinea_pig import dna # Question 0: How often is a base followed immediately by the same base? # (if completely random, we'd expect this to be 0.25) count = 0 for j in range(len(dna)-1): # len(dna)-1 possible starting locations if dna[j] == dna[j+1]: count += 1 percent = count/float(len(dna)-1) print("percent of repeat bases:") print(percent) print('') # Question 1: What percentage of consecutive bases are pattern 'AT'? # (if completely random, we'd expect 1/16 pairs) count = 0 for j in range(len(dna)-1): # len(dna)-1 possible starting locations if dna[j:j+2] == 'AT': count += 1 percent = count/float(len(dna)-1) print("percent of AT pairs:") print(percent) print('') # Question 2: What are the relative percentage of bases that immediately follow an 'A'? countA = countC = countG = countT = 0 for j in range(len(dna)-1): # len(dna)-1 possible starting locations if dna[j] == 'A': if dna[j+1] == 'A': countA += 1 elif dna[j+1] == 'C': countC += 1 elif dna[j+1] == 'G': countG += 1 else: countT += 1 total = float(countA + countC + countG + countT) print("percent of bases following an A:") print('A', countA/total) print('C', countC/total) print('G', countG/total) print('T', countT/total) print('') # Question 3: What percentage of the time is a base the same as the base that was TWO earlier? count = 0 for j in range(len(dna)-3): # only len(dna)-3 possible locations for first of the two if dna[j] == dna[j+2]: count += 1 print("percent of base matches two apart:") print(count/float(len(dna)-2)) print('') # Question 4: How many times does the pattern CCAAT occur? # (We want you to determine this WITHOUT using the built-in count method) # (Hint: Keep track of a sliding window of the most recent five characters) count = 0 for j in range(len(dna)-4): if dna[j:j+5] == 'CCAAT': count += 1 print("number of CCAAT occurrences:") print(count) print('')