Using numpy

[1]:
import numpy as np

NDArray

  • shape

  • dtype

[2]:
x = np.arange(12).reshape(3,4)
[3]:
x
[3]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
[4]:
x.shape
[4]:
(3, 4)
[5]:
x.dtype
[5]:
dtype('int64')

Indexing and slices

  • Views and copies

[6]:
x[0]
[6]:
array([0, 1, 2, 3])
[7]:
x[0, :]
[7]:
array([0, 1, 2, 3])
[8]:
x[:, 1:3]
[8]:
array([[ 1,  2],
       [ 5,  6],
       [ 9, 10]])
[9]:
y = x[:]
[10]:
y
[10]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
[11]:
y[1] = np.ones(4)
[12]:
y
[12]:
array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])
[13]:
x
[13]:
array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

Matrix multiplication

  • Row vectors, column vectors and 1d arrays

  • Changing shape - reshape, newaxis, ravel, squeeze, keepdims

[14]:
x1 = np.arange(5)
x1.shape
[14]:
(5,)
[15]:
x2 = x1.reshape(-1,1)
x2.shape
[15]:
(5, 1)
[16]:
x1 @ x1.T
[16]:
30
[17]:
x2 @ x2.T
[17]:
array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12],
       [ 0,  4,  8, 12, 16]])

Conditional replacement with where

[18]:
x
[18]:
array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])
[19]:
np.where(x % 2 == 0, 0, 1)
[19]:
array([[0, 1, 0, 1],
       [1, 1, 1, 1],
       [0, 1, 0, 1]])

Array creating functions

[20]:
np.zeros((2,3))
[20]:
array([[0., 0., 0.],
       [0., 0., 0.]])
[21]:
np.ones((2,3))
[21]:
array([[1., 1., 1.],
       [1., 1., 1.]])
[22]:
np.fromfunction(lambda i, j: i*3+j, (2, 3))
[22]:
array([[0., 1., 2.],
       [3., 4., 5.]])

Reductions (margins)

[23]:
x
[23]:
array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])
[24]:
x.sum()
[24]:
48
[25]:
x.sum(axis=0)
[25]:
array([ 9, 11, 13, 15])
[26]:
x.sum(axis=1)
[26]:
array([ 6,  4, 38])

Broadcasting

[27]:
x.shape
[27]:
(3, 4)
[28]:
x.sum(axis=0).shape
[28]:
(4,)
[29]:
x / x.sum(axis=0)
[29]:
array([[0.        , 0.09090909, 0.15384615, 0.2       ],
       [0.11111111, 0.09090909, 0.07692308, 0.06666667],
       [0.88888889, 0.81818182, 0.76923077, 0.73333333]])
[30]:
x.sum(axis=1).shape
[30]:
(3,)
[31]:
x.sum(axis=1, keepdims=True).shape
[31]:
(3, 1)
[32]:
x / x.sum(axis=1, keepdims=True)
[32]:
array([[0.        , 0.16666667, 0.33333333, 0.5       ],
       [0.25      , 0.25      , 0.25      , 0.25      ],
       [0.21052632, 0.23684211, 0.26315789, 0.28947368]])
[33]:
x / x.sum(axis=1)[:, None]
[33]:
array([[0.        , 0.16666667, 0.33333333, 0.5       ],
       [0.25      , 0.25      , 0.25      , 0.25      ],
       [0.21052632, 0.23684211, 0.26315789, 0.28947368]])
[34]:
x / x.sum(axis=1)[:, np.newaxis]
[34]:
array([[0.        , 0.16666667, 0.33333333, 0.5       ],
       [0.25      , 0.25      , 0.25      , 0.25      ],
       [0.21052632, 0.23684211, 0.26315789, 0.28947368]])

Universal functions (ufunc)

[35]:
np.sqrt(x)
[35]:
array([[0.        , 1.        , 1.41421356, 1.73205081],
       [1.        , 1.        , 1.        , 1.        ],
       [2.82842712, 3.        , 3.16227766, 3.31662479]])

Einstein summation notation

[36]:
a = np.array([[1,2], [3,4]])
b = np.array([[3,4], [5,6], [7,8]])
[37]:
a
[37]:
array([[1, 2],
       [3, 4]])
[38]:
b
[38]:
array([[3, 4],
       [5, 6],
       [7, 8]])
[39]:
m = np.zeros((a.shape[0], b.shape[0]))
for i, u in enumerate(a):
    for j, v in enumerate(b):
        m[i, j] = u @ v
m
[39]:
array([[11., 17., 23.],
       [25., 39., 53.]])
[40]:
np.einsum('in,jn -> ij', a, b)
[40]:
array([[11, 17, 23],
       [25, 39, 53]])

Random moudle (c.f. Scipy)

[41]:
np.random.poisson(3, (2,3))
[41]:
array([[2, 4, 2],
       [4, 5, 2]])
[42]:
np.random.normal(0, 1, (2,3))
[42]:
array([[ 0.30178899, -0.2008874 ,  0.9557239 ],
       [ 0.35686771,  0.56554302, -0.42485798]])
[43]:
np.random.permutation(10)
[43]:
array([2, 8, 9, 1, 3, 7, 4, 5, 6, 0])
[44]:
np.random.choice(list('abc'), (4,5))
[44]:
array([['b', 'b', 'a', 'c', 'a'],
       ['b', 'a', 'b', 'c', 'c'],
       ['c', 'c', 'c', 'b', 'a'],
       ['c', 'b', 'c', 'a', 'b']], dtype='<U1')

Linear algebra submodule (c.f. Scipy)

[45]:
x
[45]:
array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])
[46]:
np.linalg.svd(x)
[46]:
(array([[-0.16753774, -0.97815111, -0.12309149],
        [-0.10163439,  0.14132757, -0.98473193],
        [-0.98061285,  0.15246943,  0.12309149]]),
 array([1.95072080e+01, 1.86248100e+00, 3.47460824e-16]),
 array([[-0.40736415, -0.46622191, -0.52507967, -0.58393743],
        [ 0.73079029,  0.28746675, -0.15585679, -0.59918034],
        [-0.10532429, -0.26019684,  0.83636654, -0.47084542],
        [-0.53750051,  0.79517143,  0.02215866, -0.27982958]]))
[47]:
np.linalg.lstsq(x, np.arange(3), rcond=None)
[47]:
(array([ 0.21818182,  0.11818182,  0.01818182, -0.08181818]),
 array([], dtype=float64),
 2,
 array([1.95072080e+01, 1.86248100e+00, 2.73318015e-16]))

Masked array

[48]:
a = np.arange(20).reshape((4,5))
a
[48]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])
[49]:
mask = np.ma.make_mask(a % 2 == 0)
mask
[49]:
array([[ True, False,  True, False,  True],
       [False,  True, False,  True, False],
       [ True, False,  True, False,  True],
       [False,  True, False,  True, False]])
[50]:
a = np.where(a % 2 != 0, np.nan, a)
[51]:
a
[51]:
array([[ 0., nan,  2., nan,  4.],
       [nan,  6., nan,  8., nan],
       [10., nan, 12., nan, 14.],
       [nan, 16., nan, 18., nan]])
[52]:
np.sum(a)
[52]:
nan
[53]:
np.sum(a[mask])
[53]:
90.0

Memory mapping

When you are working with arrays that are too large to fit in memory, you can use memmap to map an array on disk.

[54]:
fp = np.memmap('foo.dat', dtype=np.float64, mode='w+', shape=(10,10))
[55]:
fp[:] = np.arange(100).reshape((10,10))
[56]:
fp
[56]:
memmap([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
        [10., 11., 12., 13., 14., 15., 16., 17., 18., 19.],
        [20., 21., 22., 23., 24., 25., 26., 27., 28., 29.],
        [30., 31., 32., 33., 34., 35., 36., 37., 38., 39.],
        [40., 41., 42., 43., 44., 45., 46., 47., 48., 49.],
        [50., 51., 52., 53., 54., 55., 56., 57., 58., 59.],
        [60., 61., 62., 63., 64., 65., 66., 67., 68., 69.],
        [70., 71., 72., 73., 74., 75., 76., 77., 78., 79.],
        [80., 81., 82., 83., 84., 85., 86., 87., 88., 89.],
        [90., 91., 92., 93., 94., 95., 96., 97., 98., 99.]])
[57]:
del fp
[58]:
fp1 = np.memmap('foo.dat', dtype=np.float64, shape=(10,10))
[59]:
fp1[:5, :5]
[59]:
memmap([[ 0.,  1.,  2.,  3.,  4.],
        [10., 11., 12., 13., 14.],
        [20., 21., 22., 23., 24.],
        [30., 31., 32., 33., 34.],
        [40., 41., 42., 43., 44.]])
[60]:
fp2 = np.memmap('foo.dat', dtype=np.float64, offset=75*8, shape=(5,5))
[61]:
fp2
[61]:
memmap([[75., 76., 77., 78., 79.],
        [80., 81., 82., 83., 84.],
        [85., 86., 87., 88., 89.],
        [90., 91., 92., 93., 94.],
        [95., 96., 97., 98., 99.]])
[ ]: