In [1]:
import numpy as np

Getting Started with Python

Live Demo of Jupyter Features

  • Administration interface
    • Files
    • Running
    • Uploading notebooks
    • New notebook
  • Notebook interface
    • Menu
    • Cells
    • Keyboard shortcuts
    • Getting help
In [ ]:

Using Markdown in Jupyter for Literate Programming

Markdown Syntax

In [ ]:

Elements of Python

Code and comments

In [2]:
# The code below makes a list of numbers
list(range(3, 10, 3))
Out[2]:
[3, 6, 9]

Types

None

In [3]:
None

Logical

In [4]:
True, False
Out[4]:
(True, False)

Numeric

In [5]:
1, 2, 3, 3.14, 2.78
Out[5]:
(1, 2, 3, 3.14, 2.78)

Strings

In [6]:
'a', 'hello', 'spaces are OK', "double quotes are OK too"
Out[6]:
('a', 'hello', 'spaces are OK', 'double quotes are OK too')
In [7]:
'''triple quoted strings
can span
multiple lines'''
Out[7]:
'triple quoted strings\ncan span\nmultiple lines'

Tab and newline characters

In [8]:
print('a b\tc\nd\te f')
a b     c
d       e f

String interpolation

Old style

In [9]:
'There are %d planets in our %s system' % (8, 'solar')
Out[9]:
'There are 8 planets in our solar system'

New style

In [10]:
'There are {} planets in our {} system'.format(3.14, 'lunar')
Out[10]:
'There are 3.14 planets in our lunar system'

Operators

Arithmetic

In [11]:
-1, 2+3, 7%3, 7/2, 7//2, 2**4
Out[11]:
(-1, 5, 1, 3.5, 3, 16)

Logical

In [12]:
True and True, True & False, True | False, 3 <= 4, 3 == 4, 3 != 4, 3 > 4
Out[12]:
(True, False, True, True, False, True, False)

Variables and Assignment

In [13]:
a = 3
b = 4
c = a + b
In [14]:
a, b, c
Out[14]:
(3, 4, 7)

Containers (Collections)

In [78]:
a_tuple = (1, 2, 3, 4)
a_list = ['a', 'b', 'c', 'd']
a_set = {1, 2, 2, 3, 3, 3}
a_dict = {'c': 1, 'b': 2, 'a': 3}
a_string = "a string is a container of characters"
In [16]:
a_tuple
Out[16]:
(1, 2, 3, 4)
In [17]:
a_list
Out[17]:
['a', 'b', 'c', 'd']
In [18]:
a_set
Out[18]:
{1, 2, 3}
In [19]:
a_dict
Out[19]:
{'a': 3, 'b': 2, 'c': 1}
In [79]:
a_string
Out[79]:
'a string is a container of characters'

Indexing a container

In [20]:
a_tuple[0]
Out[20]:
1
In [21]:
a_list[1:4]
Out[21]:
['b', 'c', 'd']
In [22]:
a_dict['b']
Out[22]:
2

Built-in functions

In [82]:
len("hello world")
Out[82]:
11
In [81]:
list(range(5, 10))
Out[81]:
[5, 6, 7, 8, 9]
In [83]:
print("hello\nworld")
hello
world

Conversion between types

In [23]:
x = 123
x, type(x)
Out[23]:
(123, int)
In [24]:
x = str(x)
x, type(x)
Out[24]:
('123', str)
In [25]:
x = float(x)
x, type(x)
Out[25]:
(123.0, float)
In [26]:
d = {'a': 1, 'b': 2}
type(d)
Out[26]:
dict
In [27]:
list(d)
Out[27]:
['a', 'b']
In [28]:
list(d.items())
Out[28]:
[('a', 1), ('b', 2)]

Controlling program flow

In [29]:
score = np.random.uniform(60, 100)

if score > 90:
    print('A')
elif score > 80:
    print('B')
else:
    print('C')
C

Looping

In [30]:
list(range(10, 20, 2))
Out[30]:
[10, 12, 14, 16, 18]
In [31]:
for i in range(10, 20, 2):
    print(i, i**2)
10 100
12 144
14 196
16 256
18 324
In [32]:
max_count = 5
count = 0
while (count < max_count):
    print(count)
    count += 1
0
1
2
3
4

Creating lists

In [33]:
[x**2 for x in range(5) if x % 2 == 0]
Out[33]:
[0, 4, 16]
In [34]:
xs = []
for x in range(5):
    xs.append(x**2)
xs
Out[34]:
[0, 1, 4, 9, 16]

User-defined functions

In [35]:
def f(x):
    """Say something about the function here."""
    return x
In [36]:
f(3.14)
Out[36]:
3.14
In [37]:
def g(a, b):
    """Calculate the sum of a and b."""
    return a + b
In [38]:
g(3, 4)
Out[38]:
7

Default arguments

In [39]:
def h(a= 0, b = 1, c = 2):
    """Cacluates some complicated mathematical function."""
    return a + 2*b + 3*c
In [40]:
h()
Out[40]:
8
In [41]:
h(a = 1, b = 2)
Out[41]:
11
In [42]:
h(c = 1, b = 2, a = 3)
Out[42]:
10

Using Libraries

In [43]:
import math

math.pi
Out[43]:
3.141592653589793
In [44]:
import numpy as np

np.linspace(0, 1, 11)
Out[44]:
array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ])
In [45]:
from numpy.random import rand
In [46]:
rand(4)
Out[46]:
array([ 0.27340751,  0.22682114,  0.51015736,  0.81841486])

Built-in functions

Many functions are automatically imported into the main namespace. That is why we can use functions such as range or list without importing them first.

In [47]:
range?
In [48]:
help(zip)
Help on class zip in module builtins:

class zip(object)
 |  zip(iter1 [,iter2 [...]]) --> zip object
 |
 |  Return a zip object whose .__next__() method returns a tuple where
 |  the i-th element comes from the i-th iterable argument.  The .__next__()
 |  method continues until the shortest iterable in the argument sequence
 |  is exhausted and then it raises StopIteration.
 |
 |  Methods defined here:
 |
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |
 |  __iter__(self, /)
 |      Implement iter(self).
 |
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.
 |
 |  __next__(self, /)
 |      Implement next(self).
 |
 |  __reduce__(...)
 |      Return state information for pickling.

In [49]:
list(zip(['a', 'b', 'c'], range(10)))
Out[49]:
[('a', 0), ('b', 1), ('c', 2)]

Working with vectors and arrays

In [50]:
A = np.random.random((3,4))
A
Out[50]:
array([[ 0.02088975,  0.28425656,  0.96299894,  0.08568538],
       [ 0.77909358,  0.08869851,  0.46405472,  0.09380882],
       [ 0.46485234,  0.41541956,  0.81755275,  0.87000665]])

Indexing a matrix

In [51]:
A[0,0]
Out[51]:
0.020889751435612114
In [52]:
A[2,3]
Out[52]:
0.87000665416964384
In [53]:
A[1]
Out[53]:
array([ 0.77909358,  0.08869851,  0.46405472,  0.09380882])
In [54]:
A[1, :]
Out[54]:
array([ 0.77909358,  0.08869851,  0.46405472,  0.09380882])
In [55]:
A[:, 2]
Out[55]:
array([ 0.96299894,  0.46405472,  0.81755275])
In [56]:
A[:2, 1:]
Out[56]:
array([[ 0.28425656,  0.96299894,  0.08568538],
       [ 0.08869851,  0.46405472,  0.09380882]])
In [57]:
A[1:3, 1:3]
Out[57]:
array([[ 0.08869851,  0.46405472],
       [ 0.41541956,  0.81755275]])

Vectorized functions

In [58]:
A * 10
Out[58]:
array([[ 0.20889751,  2.84256561,  9.6299894 ,  0.85685384],
       [ 7.79093577,  0.88698511,  4.64054718,  0.93808819],
       [ 4.64852338,  4.15419561,  8.17552747,  8.70006654]])
In [59]:
A.sum()
Out[59]:
5.3473175620669089
In [60]:
A.sum(axis = 0)
Out[60]:
array([ 1.26483567,  0.78837463,  2.24460641,  1.04950086])
In [61]:
A.sum(axis = 1)
Out[61]:
array([ 1.35383064,  1.42565563,  2.5678313 ])
In [62]:
A.max(axis = 0)
Out[62]:
array([ 0.77909358,  0.41541956,  0.96299894,  0.87000665])

Input and output

We will mostly be using the pandas library to read in tabular data files, so this section is just for completeness.

In [63]:
%%file test1.csv
1,2,3
4,5,6
Overwriting test1.csv

The open function returns a generator, allowing us to loop through each line of potentially massive files without using much memory.

In [64]:
with open('test1.csv', 'r') as f:
    for line in f:
        print(line, end='')
1,2,3
4,5,6
In [65]:
s = ['to be', 'or not to be']
with open('test2.txt', 'w') as f:
    f.write('\n'.join(s))
In [66]:
with open('test2.txt', 'a') as f:
    f.write('\nthat is the question')
In [67]:
with open('test2.txt', 'r') as f:
    s = f.read()
print(s)
to be
or not to be
that is the question

Warning: This may use a large amount of memory. The line by line approach shown above is recommended.

In [68]:
with open('test2.txt', 'r') as f:
    s = f.readlines()
s
Out[68]:
['to be\n', 'or not to be\n', 'that is the question']

Getting comfortable with error messages

In [69]:
foo
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-69-d3b07384d113> in <module>()
----> 1 foo

NameError: name 'foo' is not defined
In [70]:
Sort([2,3,1])
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-70-dd11fece4740> in <module>()
----> 1 Sort([2,3,1])

NameError: name 'Sort' is not defined
In [71]:
for i in range(3):
print(i)
  File "<ipython-input-71-e9b0282dd71e>", line 2
    print(i)
        ^
IndentationError: expected an indented block

In [72]:
3 + '1'
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-72-08faeb277c1e> in <module>()
----> 1 3 + '1'

TypeError: unsupported operand type(s) for +: 'int' and 'str'
In [73]:
numbers = [1,2,3]
numbers[3]
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-73-f377571de57c> in <module>()
      1 numbers = [1,2,3]
----> 2 numbers[3]

IndexError: list index out of range
In [74]:
contacts = {'bart': 'ann@fox.cartoons.org', 'bob': 'bob@pinapple.under.thesea'}
contacts['homer']
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-74-2aff8d06b7b7> in <module>()
      1 contacts = {'bart': 'ann@fox.cartoons.org', 'bob': 'bob@pinapple.under.thesea'}
----> 2 contacts['homer']

KeyError: 'homer'
In [75]:
x = 1 // 3
y = 3 // x
---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
<ipython-input-75-95c864b76059> in <module>()
      1 x = 1 // 3
----> 2 y = 3 // x

ZeroDivisionError: integer division or modulo by zero
In [76]:
range(1,2,3,4)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-76-98baec868967> in <module>()
----> 1 range(1,2,3,4)

TypeError: range expected at most 3 arguments, got 4
In [77]:
open('spongebob.txt')
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-77-defe1ddacd33> in <module>()
----> 1 open('spongebob.txt')

FileNotFoundError: [Errno 2] No such file or directory: 'spongebob.txt'

Exercises

1. Create a variable and save your name in it. Then print out "Hello <name>" where <name> comes from the saved variable.

In [1]:
name = 'Cliburn'
print('Hello {}'.format(name))
Hello Cliburn

2. Create a dictionary with day of week as key and number of letters as value. For example, one entry would have ‘monday’ as key and 6 as the value.

In [3]:
days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
d = dict(zip(days, [len(day) for day in days]))

3. Loop through the dictionary above, and print the day and number of letters for each entry on a separate line.

In [4]:
for day in d:
    print(day, d[day])
wednesday 9
sunday 6
thursday 8
tuesday 7
friday 6
monday 6
saturday 8

4. Repeat 3, but now save th lines to a file called “days.txt”. Then read in the contents of the file and print to screen.

In [13]:
with open('days.txt', 'w') as f:
    for day in d:
        f.write('%s %s\n' % (day, d[day]))
In [14]:
with open('days.txt') as f:
    for line in f:
        print(line.strip())
wednesday 9
sunday 6
thursday 8
tuesday 7
friday 6
monday 6
saturday 8

5. Loop through the dictionary above, and print the day and number of letters for each entry on a separate line, but only for days with more than 6 letters.

In [15]:
for day in d:
    if d[day] > 6:
        print(day, d[day])
wednesday 9
thursday 8
tuesday 7
saturday 8

6. Create a list with the values 1,1,2,3,5,8, and 13. Use indexing to

  • print the 3rd value
  • print the last 2 values
  • print only the odd values
In [16]:
xs = [1,1,2,3,5,8,13]
print(xs[2])
print(xs[-2:])
print([x for x in xs if x%2 == 1])
2
[8, 13]
[1, 1, 3, 5, 13]

7. Write a function that accepts a string argument and returns the number of characters in the string.

In [17]:
def strlen(s):
    """Return length of string s."""
    return len(s)
In [18]:
strlen("hello world")
Out[18]:
11

More challenging exercises

1a. Create a list of the integers from 1 to 9 using range. The solution is

[1, 2, 3, 4, 5, 6, 7, 8, 9]

1b. Create a list of the odd integers from 1 to 9. The solution is

[1, 3, 5, 7, 9]

1c. Create a list of the cubes of the odd integers from 1 to 9. The solution is

[1, 27, 125, 343, 729]
In [19]:
list(range(1, 10))
Out[19]:
[1, 2, 3, 4, 5, 6, 7, 8, 9]
In [21]:
[x for x in range(1, 10, 2)]
Out[21]:
[1, 3, 5, 7, 9]
In [22]:
[x**3 for x in range(1, 10, 2)]
Out[22]:
[1, 27, 125, 343, 729]

2 Strings have many useful methods. Here we will learn how to read the standard Python documentation for strings to understand what some of these string methods do. We will work with the following DNA sequence (gi|568815592:31575567-31578336 Homo sapiens chromosome 6, GRCh38.p7 Primary Assembly)

CAGACGCTCCCTCAGCAAGGACAGCAGAGGACCAGCTAAGAGGGAGAGAAGCAACTACAGACCCCCCCTG
AAAACAACCCTCAGACGCCACATCCCCTGACAAGCTGCCAGGCAGGTTCTCTTCCTCTCACATACTGACC
CACGGCTCCACCCTCTCTCCCCTGGAAAGGACACCATGAGCACTGAAAGCATGATCCGGGACGTGGAGCT
GGCCGAGGAGGCGCTCCCCAAGAAGACAGGGGGGCCCCAGGGCTCCAGGCGGTGCTTGTTCCTCAGCCTC
TTCTCCTTCCTGATCGTGGCAGGCGCCACCACGCTCTTCTGCCTGCTGCACTTTGGAGTGATCGGCCCCC
AGAGGGAAGAGGTGAGTGCCTGGCCAGCCTTCATCCACTCTCCCACCCAAGGGGAAATGGAGACGCAAGA
GAGGGAGAGAGATGGGATGGGTGAAAGATGTGCGCTGATAGGGAGGGATGGAGAGAAAAAAACGTGGAGA
AAGACGGGGATGCAGAAAGAGATGTGGCAAGAGATGGGGAAGAGAGAGAGAGAAAGATGGAGAGACAGGA
TGTCTGGCACATGGAAGGTGCTCACTAAGTGTGTATGGAGTGAATGAATGAATGAATGAATGAACAAGCA
GATATATAAATAAGATATGGAGACAGATGTGGGGTGTGAGAAGAGAGATGGGGGAAGAAACAAGTGATAT
GAATAAAGATGGTGAGACAGAAAGAGCGGGAAATATGACAGCTAAGGAGAGAGATGGGGGAGATAAGGAG
AGAAGAAGATAGGGTGTCTGGCACACAGAAGACACTCAGGGAAAGAGCTGTTGAATGCCTGGAAGGTGAA
TACACAGATGAATGGAGAGAGAAAACCAGACACCTCAGGGCTAAGAGCGCAGGCCAGACAGGCAGCCAGC
TGTTCCTCCTTTAAGGGTGACTCCCTCGATGTTAACCATTCTCCTTCTCCCCAACAGTTCCCCAGGGACC
TCTCTCTAATCAGCCCTCTGGCCCAGGCAGTCAGTAAGTGTCTCCAAACCTCTTTCCTAATTCTGGGTTT
GGGTTTGGGGGTAGGGTTAGTACCGGTATGGAAGCAGTGGGGGAAATTTAAAGTTTTGGTCTTGGGGGAG
GATGGATGGAGGTGAAAGTAGGGGGGTATTTTCTAGGAAGTTTAAGGGTCTCAGCTTTTTCTTTTCTCTC
TCCTCTTCAGGATCATCTTCTCGAACCCCGAGTGACAAGCCTGTAGCCCATGTTGTAGGTAAGAGCTCTG
AGGATGTGTCTTGGAACTTGGAGGGCTAGGATTTGGGGATTGAAGCCCGGCTGATGGTAGGCAGAACTTG
GAGACAATGTGAGAAGGACTCGCTGAGCTCAAGGGAAGGGTGGAGGAACAGCACAGGCCTTAGTGGGATA
CTCAGAACGTCATGGCCAGGTGGGATGTGGGATGACAGACAGAGAGGACAGGAACCGGATGTGGGGTGGG
CAGAGCTCGAGGGCCAGGATGTGGAGAGTGAACCGACATGGCCACACTGACTCTCCTCTCCCTCTCTCCC
TCCCTCCAGCAAACCCTCAAGCTGAGGGGCAGCTCCAGTGGCTGAACCGCCGGGCCAATGCCCTCCTGGC
CAATGGCGTGGAGCTGAGAGATAACCAGCTGGTGGTGCCATCAGAGGGCCTGTACCTCATCTACTCCCAG
GTCCTCTTCAAGGGCCAAGGCTGCCCCTCCACCCATGTGCTCCTCACCCACACCATCAGCCGCATCGCCG
TCTCCTACCAGACCAAGGTCAACCTCCTCTCTGCCATCAAGAGCCCCTGCCAGAGGGAGACCCCAGAGGG
GGCTGAGGCCAAGCCCTGGTATGAGCCCATCTATCTGGGAGGGGTCTTCCAGCTGGAGAAGGGTGACCGA
CTCAGCGCTGAGATCAATCGGCCCGACTATCTCGACTTTGCCGAGTCTGGGCAGGTCTACTTTGGGATCA
TTGCCCTGTGAGGAGGACGAACATCCAACCTTCCCAAACGCCTCCCCTGCCCCAATCCCTTTATTACCCC
CTCCTTCAGACACCCTCAACCTCTTCTGGCTCAAAAAGAGAATTGGGGGCTTAGGGTCGGAACCCAAGCT
TAGAACTTTAAGCAACAAGACCACCACTTCGAAACCTGGGATTCAGGAATGTGTGGCCTGCACAGTGAAG
TGCTGGCAACCACTAAGAATTCAAACTGGGGCCTCCAGAACTCACTGGGGCCTACAGCTTTGATCCCTGA
CATCTGGAATCTGGAGACCAGGGAGCCTTTGGTTCTGGCCAGAATGCTGCAGGACTTGAGAAGACCTCAC
CTAGAAATTGACACAAGTGGACCTTAGGCCTTCCTCTCTCCAGATGTTTCCAGACTTCCTTGAGACACGG
AGCCCAGCCCTCCCCATGGAGCCAGCTCCCTCTATTTATGTTTGCACTTGTGATTATTTATTATTTATTT
ATTATTTATTTATTTACAGATGAATGTATTTATTTGGGAGACCGGGGTATCCTGGGGGACCCAATGTAGG
AGCTGCCTTGGCTCAGACATGTTTTCCGTGAAAACGGAGCTGAACAATAGGCTGTTCCCATGTAGCCCCC
TGGCCTCTGTGCCTTCTTTTGATTATGTTTTTTAAAATATTTATCTGATTAAGTTGTCTAAACAATGCTG
ATTTGGTGACCAACTGTCACTCATTGCTGAGCCTCTGCTCCCCAGGGGAGTTGTGTCTGTAATCGCCCTA
CTATTCAGTGGCGAGAAATAAAGTTTGCTTAGAAAAGAAA

2a. Assign the sequence to a string variable called tnf. (Hint: This string spans multiple lines)

2b. Calculate the GC content

GC content

GC content

2c. Find the RNA transcript using the mapping A->A, T->U, C->C, G->G.

In [25]:
tnf = '''CAGACGCTCCCTCAGCAAGGACAGCAGAGGACCAGCTAAGAGGGAGAGAAGCAACTACAGACCCCCCCTG
AAAACAACCCTCAGACGCCACATCCCCTGACAAGCTGCCAGGCAGGTTCTCTTCCTCTCACATACTGACC
CACGGCTCCACCCTCTCTCCCCTGGAAAGGACACCATGAGCACTGAAAGCATGATCCGGGACGTGGAGCT
GGCCGAGGAGGCGCTCCCCAAGAAGACAGGGGGGCCCCAGGGCTCCAGGCGGTGCTTGTTCCTCAGCCTC
TTCTCCTTCCTGATCGTGGCAGGCGCCACCACGCTCTTCTGCCTGCTGCACTTTGGAGTGATCGGCCCCC
AGAGGGAAGAGGTGAGTGCCTGGCCAGCCTTCATCCACTCTCCCACCCAAGGGGAAATGGAGACGCAAGA
GAGGGAGAGAGATGGGATGGGTGAAAGATGTGCGCTGATAGGGAGGGATGGAGAGAAAAAAACGTGGAGA
AAGACGGGGATGCAGAAAGAGATGTGGCAAGAGATGGGGAAGAGAGAGAGAGAAAGATGGAGAGACAGGA
TGTCTGGCACATGGAAGGTGCTCACTAAGTGTGTATGGAGTGAATGAATGAATGAATGAATGAACAAGCA
GATATATAAATAAGATATGGAGACAGATGTGGGGTGTGAGAAGAGAGATGGGGGAAGAAACAAGTGATAT
GAATAAAGATGGTGAGACAGAAAGAGCGGGAAATATGACAGCTAAGGAGAGAGATGGGGGAGATAAGGAG
AGAAGAAGATAGGGTGTCTGGCACACAGAAGACACTCAGGGAAAGAGCTGTTGAATGCCTGGAAGGTGAA
TACACAGATGAATGGAGAGAGAAAACCAGACACCTCAGGGCTAAGAGCGCAGGCCAGACAGGCAGCCAGC
TGTTCCTCCTTTAAGGGTGACTCCCTCGATGTTAACCATTCTCCTTCTCCCCAACAGTTCCCCAGGGACC
TCTCTCTAATCAGCCCTCTGGCCCAGGCAGTCAGTAAGTGTCTCCAAACCTCTTTCCTAATTCTGGGTTT
GGGTTTGGGGGTAGGGTTAGTACCGGTATGGAAGCAGTGGGGGAAATTTAAAGTTTTGGTCTTGGGGGAG
GATGGATGGAGGTGAAAGTAGGGGGGTATTTTCTAGGAAGTTTAAGGGTCTCAGCTTTTTCTTTTCTCTC
TCCTCTTCAGGATCATCTTCTCGAACCCCGAGTGACAAGCCTGTAGCCCATGTTGTAGGTAAGAGCTCTG
AGGATGTGTCTTGGAACTTGGAGGGCTAGGATTTGGGGATTGAAGCCCGGCTGATGGTAGGCAGAACTTG
GAGACAATGTGAGAAGGACTCGCTGAGCTCAAGGGAAGGGTGGAGGAACAGCACAGGCCTTAGTGGGATA
CTCAGAACGTCATGGCCAGGTGGGATGTGGGATGACAGACAGAGAGGACAGGAACCGGATGTGGGGTGGG
CAGAGCTCGAGGGCCAGGATGTGGAGAGTGAACCGACATGGCCACACTGACTCTCCTCTCCCTCTCTCCC
TCCCTCCAGCAAACCCTCAAGCTGAGGGGCAGCTCCAGTGGCTGAACCGCCGGGCCAATGCCCTCCTGGC
CAATGGCGTGGAGCTGAGAGATAACCAGCTGGTGGTGCCATCAGAGGGCCTGTACCTCATCTACTCCCAG
GTCCTCTTCAAGGGCCAAGGCTGCCCCTCCACCCATGTGCTCCTCACCCACACCATCAGCCGCATCGCCG
TCTCCTACCAGACCAAGGTCAACCTCCTCTCTGCCATCAAGAGCCCCTGCCAGAGGGAGACCCCAGAGGG
GGCTGAGGCCAAGCCCTGGTATGAGCCCATCTATCTGGGAGGGGTCTTCCAGCTGGAGAAGGGTGACCGA
CTCAGCGCTGAGATCAATCGGCCCGACTATCTCGACTTTGCCGAGTCTGGGCAGGTCTACTTTGGGATCA
TTGCCCTGTGAGGAGGACGAACATCCAACCTTCCCAAACGCCTCCCCTGCCCCAATCCCTTTATTACCCC
CTCCTTCAGACACCCTCAACCTCTTCTGGCTCAAAAAGAGAATTGGGGGCTTAGGGTCGGAACCCAAGCT
TAGAACTTTAAGCAACAAGACCACCACTTCGAAACCTGGGATTCAGGAATGTGTGGCCTGCACAGTGAAG
TGCTGGCAACCACTAAGAATTCAAACTGGGGCCTCCAGAACTCACTGGGGCCTACAGCTTTGATCCCTGA
CATCTGGAATCTGGAGACCAGGGAGCCTTTGGTTCTGGCCAGAATGCTGCAGGACTTGAGAAGACCTCAC
CTAGAAATTGACACAAGTGGACCTTAGGCCTTCCTCTCTCCAGATGTTTCCAGACTTCCTTGAGACACGG
AGCCCAGCCCTCCCCATGGAGCCAGCTCCCTCTATTTATGTTTGCACTTGTGATTATTTATTATTTATTT
ATTATTTATTTATTTACAGATGAATGTATTTATTTGGGAGACCGGGGTATCCTGGGGGACCCAATGTAGG
AGCTGCCTTGGCTCAGACATGTTTTCCGTGAAAACGGAGCTGAACAATAGGCTGTTCCCATGTAGCCCCC
TGGCCTCTGTGCCTTCTTTTGATTATGTTTTTTAAAATATTTATCTGATTAAGTTGTCTAAACAATGCTG
ATTTGGTGACCAACTGTCACTCATTGCTGAGCCTCTGCTCCCCAGGGGAGTTGTGTCTGTAATCGCCCTA
CTATTCAGTGGCGAGAAATAAAGTTTGCTTAGAAAAGAAA'''
In [26]:
ng = tnf.count('G')
nc = tnf.count('C')
na = tnf.count('A')
nt = tnf.count('T')
(nc + ng)/(na + nt + nc + ng)
Out[26]:
0.5281588447653429
In [28]:
print(tnf.replace('T', 'U'))
CAGACGCUCCCUCAGCAAGGACAGCAGAGGACCAGCUAAGAGGGAGAGAAGCAACUACAGACCCCCCCUG
AAAACAACCCUCAGACGCCACAUCCCCUGACAAGCUGCCAGGCAGGUUCUCUUCCUCUCACAUACUGACC
CACGGCUCCACCCUCUCUCCCCUGGAAAGGACACCAUGAGCACUGAAAGCAUGAUCCGGGACGUGGAGCU
GGCCGAGGAGGCGCUCCCCAAGAAGACAGGGGGGCCCCAGGGCUCCAGGCGGUGCUUGUUCCUCAGCCUC
UUCUCCUUCCUGAUCGUGGCAGGCGCCACCACGCUCUUCUGCCUGCUGCACUUUGGAGUGAUCGGCCCCC
AGAGGGAAGAGGUGAGUGCCUGGCCAGCCUUCAUCCACUCUCCCACCCAAGGGGAAAUGGAGACGCAAGA
GAGGGAGAGAGAUGGGAUGGGUGAAAGAUGUGCGCUGAUAGGGAGGGAUGGAGAGAAAAAAACGUGGAGA
AAGACGGGGAUGCAGAAAGAGAUGUGGCAAGAGAUGGGGAAGAGAGAGAGAGAAAGAUGGAGAGACAGGA
UGUCUGGCACAUGGAAGGUGCUCACUAAGUGUGUAUGGAGUGAAUGAAUGAAUGAAUGAAUGAACAAGCA
GAUAUAUAAAUAAGAUAUGGAGACAGAUGUGGGGUGUGAGAAGAGAGAUGGGGGAAGAAACAAGUGAUAU
GAAUAAAGAUGGUGAGACAGAAAGAGCGGGAAAUAUGACAGCUAAGGAGAGAGAUGGGGGAGAUAAGGAG
AGAAGAAGAUAGGGUGUCUGGCACACAGAAGACACUCAGGGAAAGAGCUGUUGAAUGCCUGGAAGGUGAA
UACACAGAUGAAUGGAGAGAGAAAACCAGACACCUCAGGGCUAAGAGCGCAGGCCAGACAGGCAGCCAGC
UGUUCCUCCUUUAAGGGUGACUCCCUCGAUGUUAACCAUUCUCCUUCUCCCCAACAGUUCCCCAGGGACC
UCUCUCUAAUCAGCCCUCUGGCCCAGGCAGUCAGUAAGUGUCUCCAAACCUCUUUCCUAAUUCUGGGUUU
GGGUUUGGGGGUAGGGUUAGUACCGGUAUGGAAGCAGUGGGGGAAAUUUAAAGUUUUGGUCUUGGGGGAG
GAUGGAUGGAGGUGAAAGUAGGGGGGUAUUUUCUAGGAAGUUUAAGGGUCUCAGCUUUUUCUUUUCUCUC
UCCUCUUCAGGAUCAUCUUCUCGAACCCCGAGUGACAAGCCUGUAGCCCAUGUUGUAGGUAAGAGCUCUG
AGGAUGUGUCUUGGAACUUGGAGGGCUAGGAUUUGGGGAUUGAAGCCCGGCUGAUGGUAGGCAGAACUUG
GAGACAAUGUGAGAAGGACUCGCUGAGCUCAAGGGAAGGGUGGAGGAACAGCACAGGCCUUAGUGGGAUA
CUCAGAACGUCAUGGCCAGGUGGGAUGUGGGAUGACAGACAGAGAGGACAGGAACCGGAUGUGGGGUGGG
CAGAGCUCGAGGGCCAGGAUGUGGAGAGUGAACCGACAUGGCCACACUGACUCUCCUCUCCCUCUCUCCC
UCCCUCCAGCAAACCCUCAAGCUGAGGGGCAGCUCCAGUGGCUGAACCGCCGGGCCAAUGCCCUCCUGGC
CAAUGGCGUGGAGCUGAGAGAUAACCAGCUGGUGGUGCCAUCAGAGGGCCUGUACCUCAUCUACUCCCAG
GUCCUCUUCAAGGGCCAAGGCUGCCCCUCCACCCAUGUGCUCCUCACCCACACCAUCAGCCGCAUCGCCG
UCUCCUACCAGACCAAGGUCAACCUCCUCUCUGCCAUCAAGAGCCCCUGCCAGAGGGAGACCCCAGAGGG
GGCUGAGGCCAAGCCCUGGUAUGAGCCCAUCUAUCUGGGAGGGGUCUUCCAGCUGGAGAAGGGUGACCGA
CUCAGCGCUGAGAUCAAUCGGCCCGACUAUCUCGACUUUGCCGAGUCUGGGCAGGUCUACUUUGGGAUCA
UUGCCCUGUGAGGAGGACGAACAUCCAACCUUCCCAAACGCCUCCCCUGCCCCAAUCCCUUUAUUACCCC
CUCCUUCAGACACCCUCAACCUCUUCUGGCUCAAAAAGAGAAUUGGGGGCUUAGGGUCGGAACCCAAGCU
UAGAACUUUAAGCAACAAGACCACCACUUCGAAACCUGGGAUUCAGGAAUGUGUGGCCUGCACAGUGAAG
UGCUGGCAACCACUAAGAAUUCAAACUGGGGCCUCCAGAACUCACUGGGGCCUACAGCUUUGAUCCCUGA
CAUCUGGAAUCUGGAGACCAGGGAGCCUUUGGUUCUGGCCAGAAUGCUGCAGGACUUGAGAAGACCUCAC
CUAGAAAUUGACACAAGUGGACCUUAGGCCUUCCUCUCUCCAGAUGUUUCCAGACUUCCUUGAGACACGG
AGCCCAGCCCUCCCCAUGGAGCCAGCUCCCUCUAUUUAUGUUUGCACUUGUGAUUAUUUAUUAUUUAUUU
AUUAUUUAUUUAUUUACAGAUGAAUGUAUUUAUUUGGGAGACCGGGGUAUCCUGGGGGACCCAAUGUAGG
AGCUGCCUUGGCUCAGACAUGUUUUCCGUGAAAACGGAGCUGAACAAUAGGCUGUUCCCAUGUAGCCCCC
UGGCCUCUGUGCCUUCUUUUGAUUAUGUUUUUUAAAAUAUUUAUCUGAUUAAGUUGUCUAAACAAUGCUG
AUUUGGUGACCAACUGUCACUCAUUGCUGAGCCUCUGCUCCCCAGGGGAGUUGUGUCUGUAAUCGCCCUA
CUAUUCAGUGGCGAGAAAUAAAGUUUGCUUAGAAAAGAAA

Alternative version: The translate method can make multiple substitutions at the same time.

In [33]:
print(tnf.translate(tnf.maketrans('ACTG', 'acug')))
cagacgcucccucagcaaggacagcagaggaccagcuaagagggagagaagcaacuacagacccccccug
aaaacaacccucagacgccacauccccugacaagcugccaggcagguucucuuccucucacauacugacc
cacggcuccacccucucuccccuggaaaggacaccaugagcacugaaagcaugauccgggacguggagcu
ggccgaggaggcgcuccccaagaagacaggggggccccagggcuccaggcggugcuuguuccucagccuc
uucuccuuccugaucguggcaggcgccaccacgcucuucugccugcugcacuuuggagugaucggccccc
agagggaagaggugagugccuggccagccuucauccacucucccacccaaggggaaauggagacgcaaga
gagggagagagaugggaugggugaaagaugugcgcugauagggagggauggagagaaaaaaacguggaga
aagacggggaugcagaaagagauguggcaagagauggggaagagagagagagaaagauggagagacagga
ugucuggcacauggaaggugcucacuaaguguguauggagugaaugaaugaaugaaugaaugaacaagca
gauauauaaauaagauauggagacagauguggggugugagaagagagaugggggaagaaacaagugauau
gaauaaagauggugagacagaaagagcgggaaauaugacagcuaaggagagagaugggggagauaaggag
agaagaagauagggugucuggcacacagaagacacucagggaaagagcuguugaaugccuggaaggugaa
uacacagaugaauggagagagaaaaccagacaccucagggcuaagagcgcaggccagacaggcagccagc
uguuccuccuuuaagggugacucccucgauguuaaccauucuccuucuccccaacaguuccccagggacc
ucucucuaaucagcccucuggcccaggcagucaguaagugucuccaaaccucuuuccuaauucuggguuu
ggguuuggggguaggguuaguaccgguauggaagcagugggggaaauuuaaaguuuuggucuugggggag
gauggauggaggugaaaguagggggguauuuucuaggaaguuuaagggucucagcuuuuucuuuucucuc
uccucuucaggaucaucuucucgaaccccgagugacaagccuguagcccauguuguagguaagagcucug
aggaugugucuuggaacuuggagggcuaggauuuggggauugaagcccggcugaugguaggcagaacuug
gagacaaugugagaaggacucgcugagcucaagggaaggguggaggaacagcacaggccuuagugggaua
cucagaacgucauggccaggugggaugugggaugacagacagagaggacaggaaccggaugugggguggg
cagagcucgagggccaggauguggagagugaaccgacauggccacacugacucuccucucccucucuccc
ucccuccagcaaacccucaagcugaggggcagcuccaguggcugaaccgccgggccaaugcccuccuggc
caauggcguggagcugagagauaaccagcugguggugccaucagagggccuguaccucaucuacucccag
guccucuucaagggccaaggcugccccuccacccaugugcuccucacccacaccaucagccgcaucgccg
ucuccuaccagaccaaggucaaccuccucucugccaucaagagccccugccagagggagaccccagaggg
ggcugaggccaagcccugguaugagcccaucuaucugggaggggucuuccagcuggagaagggugaccga
cucagcgcugagaucaaucggcccgacuaucucgacuuugccgagucugggcaggucuacuuugggauca
uugcccugugaggaggacgaacauccaaccuucccaaacgccuccccugccccaaucccuuuauuacccc
cuccuucagacacccucaaccucuucuggcucaaaaagagaauugggggcuuagggucggaacccaagcu
uagaacuuuaagcaacaagaccaccacuucgaaaccugggauucaggaauguguggccugcacagugaag
ugcuggcaaccacuaagaauucaaacuggggccuccagaacucacuggggccuacagcuuugaucccuga
caucuggaaucuggagaccagggagccuuugguucuggccagaaugcugcaggacuugagaagaccucac
cuagaaauugacacaaguggaccuuaggccuuccucucuccagauguuuccagacuuccuugagacacgg
agcccagcccuccccauggagccagcucccucuauuuauguuugcacuugugauuauuuauuauuuauuu
auuauuuauuuauuuacagaugaauguauuuauuugggagaccgggguauccugggggacccaauguagg
agcugccuuggcucagacauguuuuccgugaaaacggagcugaacaauaggcuguucccauguagccccc
uggccucugugccuucuuuugauuauguuuuuuaaaauauuuaucugauuaaguugucuaaacaaugcug
auuuggugaccaacugucacucauugcugagccucugcuccccaggggaguugugucuguaaucgcccua
cuauucaguggcgagaaauaaaguuugcuuagaaaagaaa

3. You have 5 kids in your household, whose behavior has been

  • Ann, Good
  • Bob, Bad
  • Charlie, Good
  • David, Good
  • Ella, Bad

On Christmas Eve, Santa will give good kids an iPhone 7 and bad kids a lump of coal.

3a. Store the kids name and behavior in a dictionary called santa_dict.

3b. On Christmas Eve Eve, David threw a tantrum and kicked his sister Ann. Change the dictionary entry for David to Bad.

3c. Write a loop that prints the name of each child, followed by ‘Coal’ or ‘iPhone’. The output should be

David Coal
Ann iPhone
Ella Coal
Charlie iPhone
Bob Coal
In [34]:
santa_dict = {
    'Ann': 'Good',
    'Bob': 'Bad',
    'Charlie': 'Good',
    'David': 'Good',
    'Ella': 'Bad'
    }
In [35]:
santa_dict['David'] = 'Bad'
In [38]:
for kid, behavior in santa_dict.items():
    if behavior == 'Good':
        print(kid, 'iPhone')
    else:
        print(kid, 'Coal')
David Coal
Bob Coal
Ann iPhone
Charlie iPhone
Ella Coal

4a. Write a function (call the function collatz) of a positive integer that returns the following result

  • If the number is even, divide it by two
  • If the number is odd, triple it and add one
Collatz

Collatz

4b. Write a loop that repeatedly calls n = collatz(n) given some start value n while n is not equal to 1. At each iteration in the loop, print the current value of n.

4c. Write a function collatz_sequence that takes a positive integer argument n and returns the list of numbers generated by the while loop from 4b starting with the given value of n. For example, collatz_sequence(6) should give the following output:

[6, 3, 10, 5, 16, 8, 4, 2, 1]
In [39]:
def collatz(n):
    """The hailstone function for generating Collatz sequences."""
    if n % 2 == 0:
        return n // 2
    else:
        return 3*n + 1
In [41]:
n = 6
while n != 1:
    print(n, end=', ')
    n = collatz(n)
6, 3, 10, 5, 16, 8, 4, 2,
In [42]:
def collatz_sequence(n):
    """Returns the Collatz seqeuece beginnning with n."""
    ns = [n]
    while n != 1:
        n = collatz(n)
        ns.append(n)
    return ns
In [43]:
collatz_sequence(6)
Out[43]:
[6, 3, 10, 5, 16, 8, 4, 2, 1]