1.
Convert this string
123-456-789
into this string
321:654:987
In [1]:
s = '123-456-789'
Method 1
In [2]:
':'.join(item[::-1] for item in s.split('-'))
Out[2]:
'321:654:987'
Method 2
In [3]:
s.translate(str.maketrans('123456789-', '321654987:'))
Out[3]:
'321:654:987'
2.
Read the DNA sequence in the file dan.fasta and find the longest run
of a single nucleotide. For example, the longest run in GATTACA is T
with a length of 2. Ignore lines beginning with >.
In [4]:
lines = []
with open('dna.fasta') as f:
for line in f:
if not line.startswith('>'):
lines.append(line.strip())
dna = ''.join(lines)
In [5]:
dna
Out[5]:
'ATGAATAAAATATACTACATAATCTTTTTAAGCGCCCAGTGCCTTGTGCACATTGGGAAGTGCGGGCGAAACCATAAGCCGAGCAGGCTGACCCGTAGCGCCAACAACGTTCTACTGGAAAAGGGGCCTACCGTTGAGAGAAGCACACGAATGAGTAACCCCTGGAAAGCGTTCATGGAAAAATACGACATCGAAAGAACACACAGTTCTGGGGTTCGAGTGGATTTAGGGGAAGATGCAGAAGTGGAAAATGCAAAGTACAGAATTCCAGCTGGAAGATGTCCTGTTTTTGGAAAGGGTATCGTCATAGAGAATTCCGCTGTTAGCTTCTTAACCCCTGTGGCTACAGGAGATCAGAGGCTGAAGGATGGAGGTTTCGCCTTCCCCAAAGCGGATGACCATATCTCCCCCATGACATTAGCGAACCTTAAGGAAAGGTATAAAGACAATGTAGAGATGATGAAGTTAAACGATATAGCTTTGTGCAGAACCCACGCAGCTAGCTTTGTCATGGCAGGGGATCAAAATTCGTCCTACAGACACCCAGCTGTATACGACGAAAAGGAAAAAACATGCCACATGTTGTATTTATCAGCGCAGGAAAATATGGGTCCGAGGTACTGCAGCTCAGATGCACAAAATAGAGATGCCGTGTTCTGCTTCAAGCCAGATAAAAATGTAGATTTTGAAAACCTGGTGTATTTGAGCAAAAATGTGCGTAATGATTGGGATAAAAAATGTCCCCGTAAAAATTTAGGAAACGCCAAGTTCGGATTATGGGTGGATGGGAACTGCGAAGAAATTCCATACGTTAAAGAAGTGGAGGCAAAGGATCTGCGCGAATGCAACCGAATCGTTTTCGAAGCGAGTGCCTCAGATCAACCAACTCAGTATGAAGAAGAAATGACGGATTATCAAAAAATACAACAAGGGTTTAGACAAAACAACCGAGAGATGATTAAAAGTGCCTTTCTTCCAGTGGGTGCATTCAACTCGGATAATTTCAAAAGTAAAGGAAGAGGATTTAACTGGGCAAATTTCGATTCTGTAAAAAAGAAGTGTTACATTTTTAATACCAAACCGACTTGCCTCATTAATGACAAAAATTTTATTGCAACAACGGCGTTATCTCACCCACAAGAAGTAGACCGGGATTTCCCCTGCAGCATATATAAAGACGAAATTGAAAGAGAAATTAGGAAACAATCGAGGAACATGAATCTGTACAGTGTTGATGGGGAACGCATTGTCCTGCCGAGGATATTTATCTCCAACGATAAGGAGAGTATCAAATGTCCCTGCGAACCTGAGCACATTTCCAACAGTACCTGCAACTTTTACGTTTGTAACTGTGTAGAGAAAAGGGCGGAAATTAAGGAAAATAACCAAGTTGTTATAAAGGAAGAATTTAGGGATTATTACGAAAATGGGGAGGAAAAATCGAACAAGCAGATGCTACTAATCATTATCGGAATAACTGGTGGCGTGTGCGTCGTCGCGCTGGCCTCTATGGCCTACTTCAAGAAGAAGGCTAACAATGATAAGTATGACAAGATGGACCAGGCAGAGGGGTACGGGAAGCCCACCACCAGGAAGGACGAGATGCTCGACCCCGAGGCCTCCTTCTGGGGCGAGGACAAGCGGGCCTCCCACACCACGCCCGTGCTGATGGAGAAGCCGTACTACTGA'
Method 1
Using a finite state machine with two states (INCR and RESET)
In [6]:
best = 0
nuc = dna[0]
count = 1
best_nuc = None
for x in dna[1:]:
# INCR
if x == nuc:
count += 1
# RESET
else:
if count > best:
best = count
best_nuc = nuc
nuc = x
count =1
In [7]:
best_nuc, best
Out[7]:
('A', 6)
Method 2
Using regular expressions
In [8]:
import re
In [9]:
runs = re.findall('(A+|T+|C+|G+)', dna)
In [10]:
max(runs, key=len)
Out[10]:
'AAAAAA'
In [ ]: