Using numpy¶

[1]:

import numpy as np

NDArray¶

shape
dtype

[2]:

x = np.arange(12).reshape(3,4)

[3]:

[3]:

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

[4]:

x.shape

[4]:

(3, 4)

[5]:

x.dtype

[5]:

dtype('int64')

Indexing and slices¶

Views and copies

[6]:

x[0]

[6]:

array([0, 1, 2, 3])

[7]:

x[0, :]

[7]:

array([0, 1, 2, 3])

[8]:

x[:, 1:3]

[8]:

array([[ 1,  2],
       [ 5,  6],
       [ 9, 10]])

[9]:

y = x[:]

[10]:

[10]:

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

[11]:

y[1] = np.ones(4)

[12]:

[12]:

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

[13]:

[13]:

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

Matrix multiplication¶

Row vectors, column vectors and 1d arrays
Changing shape - reshape, newaxis, ravel, squeeze, keepdims

[14]:

x1 = np.arange(5)
x1.shape

[14]:

(5,)

[15]:

x2 = x1.reshape(-1,1)
x2.shape

[15]:

(5, 1)

[16]:

x1 @ x1.T

[16]:

[17]:

x2 @ x2.T

[17]:

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12],
       [ 0,  4,  8, 12, 16]])

Conditional replacement with where¶

[18]:

[18]:

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

[19]:

np.where(x % 2 == 0, 0, 1)

[19]:

array([[0, 1, 0, 1],
       [1, 1, 1, 1],
       [0, 1, 0, 1]])

Array creating functions¶

[20]:

np.zeros((2,3))

[20]:

array([[0., 0., 0.],
       [0., 0., 0.]])

[21]:

np.ones((2,3))

[21]:

array([[1., 1., 1.],
       [1., 1., 1.]])

[22]:

np.fromfunction(lambda i, j: i*3+j, (2, 3))

[22]:

array([[0., 1., 2.],
       [3., 4., 5.]])

Reductions (margins)¶

[23]:

[23]:

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

[24]:

x.sum()

[24]:

[25]:

x.sum(axis=0)

[25]:

array([ 9, 11, 13, 15])

[26]:

x.sum(axis=1)

[26]:

array([ 6,  4, 38])

Broadcasting¶

[27]:

x.shape

[27]:

(3, 4)

[28]:

x.sum(axis=0).shape

[28]:

(4,)

[29]:

x / x.sum(axis=0)

[29]:

array([[0.        , 0.09090909, 0.15384615, 0.2       ],
       [0.11111111, 0.09090909, 0.07692308, 0.06666667],
       [0.88888889, 0.81818182, 0.76923077, 0.73333333]])

[30]:

x.sum(axis=1).shape

[30]:

(3,)

[31]:

x.sum(axis=1, keepdims=True).shape

[31]:

(3, 1)

[32]:

x / x.sum(axis=1, keepdims=True)

[32]:

array([[0.        , 0.16666667, 0.33333333, 0.5       ],
       [0.25      , 0.25      , 0.25      , 0.25      ],
       [0.21052632, 0.23684211, 0.26315789, 0.28947368]])

[33]:

x / x.sum(axis=1)[:, None]

[33]:

array([[0.        , 0.16666667, 0.33333333, 0.5       ],
       [0.25      , 0.25      , 0.25      , 0.25      ],
       [0.21052632, 0.23684211, 0.26315789, 0.28947368]])

[34]:

x / x.sum(axis=1)[:, np.newaxis]

[34]:

array([[0.        , 0.16666667, 0.33333333, 0.5       ],
       [0.25      , 0.25      , 0.25      , 0.25      ],
       [0.21052632, 0.23684211, 0.26315789, 0.28947368]])

Universal functions (ufunc)¶

[35]:

np.sqrt(x)

[35]:

array([[0.        , 1.        , 1.41421356, 1.73205081],
       [1.        , 1.        , 1.        , 1.        ],
       [2.82842712, 3.        , 3.16227766, 3.31662479]])

Einstein summation notation¶

[36]:

a = np.array([[1,2], [3,4]])
b = np.array([[3,4], [5,6], [7,8]])

[37]:

[37]:

array([[1, 2],
       [3, 4]])

[38]:

[38]:

array([[3, 4],
       [5, 6],
       [7, 8]])

[39]:

m = np.zeros((a.shape[0], b.shape[0]))
for i, u in enumerate(a):
    for j, v in enumerate(b):
        m[i, j] = u @ v
m

[39]:

array([[11., 17., 23.],
       [25., 39., 53.]])

[40]:

np.einsum('in,jn -> ij', a, b)

[40]:

array([[11, 17, 23],
       [25, 39, 53]])

Random moudle (c.f. Scipy)¶

[41]:

np.random.poisson(3, (2,3))

[41]:

array([[2, 4, 2],
       [4, 5, 2]])

[42]:

np.random.normal(0, 1, (2,3))

[42]:

array([[ 0.30178899, -0.2008874 ,  0.9557239 ],
       [ 0.35686771,  0.56554302, -0.42485798]])

[43]:

np.random.permutation(10)

[43]:

array([2, 8, 9, 1, 3, 7, 4, 5, 6, 0])

[44]:

np.random.choice(list('abc'), (4,5))

[44]:

array([['b', 'b', 'a', 'c', 'a'],
       ['b', 'a', 'b', 'c', 'c'],
       ['c', 'c', 'c', 'b', 'a'],
       ['c', 'b', 'c', 'a', 'b']], dtype='<U1')

Linear algebra submodule (c.f. Scipy)¶

[45]:

[45]:

array([[ 0,  1,  2,  3],
       [ 1,  1,  1,  1],
       [ 8,  9, 10, 11]])

[46]:

np.linalg.svd(x)

[46]:

(array([[-0.16753774, -0.97815111, -0.12309149],
        [-0.10163439,  0.14132757, -0.98473193],
        [-0.98061285,  0.15246943,  0.12309149]]),
 array([1.95072080e+01, 1.86248100e+00, 3.47460824e-16]),
 array([[-0.40736415, -0.46622191, -0.52507967, -0.58393743],
        [ 0.73079029,  0.28746675, -0.15585679, -0.59918034],
        [-0.10532429, -0.26019684,  0.83636654, -0.47084542],
        [-0.53750051,  0.79517143,  0.02215866, -0.27982958]]))

[47]:

np.linalg.lstsq(x, np.arange(3), rcond=None)

[47]:

(array([ 0.21818182,  0.11818182,  0.01818182, -0.08181818]),
 array([], dtype=float64),
 2,
 array([1.95072080e+01, 1.86248100e+00, 2.73318015e-16]))

Masked array¶

[48]:

a = np.arange(20).reshape((4,5))
a

[48]:

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

[49]:

mask = np.ma.make_mask(a % 2 == 0)
mask

[49]:

array([[ True, False,  True, False,  True],
       [False,  True, False,  True, False],
       [ True, False,  True, False,  True],
       [False,  True, False,  True, False]])

[50]:

a = np.where(a % 2 != 0, np.nan, a)

[51]:

[51]:

array([[ 0., nan,  2., nan,  4.],
       [nan,  6., nan,  8., nan],
       [10., nan, 12., nan, 14.],
       [nan, 16., nan, 18., nan]])

[52]:

np.sum(a)

[52]:

nan

[53]:

np.sum(a[mask])

[53]:

90.0

Memory mapping¶

When you are working with arrays that are too large to fit in memory, you can use memmap to map an array on disk.

[54]:

fp = np.memmap('foo.dat', dtype=np.float64, mode='w+', shape=(10,10))

[55]:

fp[:] = np.arange(100).reshape((10,10))

[56]:

fp

[56]:

memmap([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
        [10., 11., 12., 13., 14., 15., 16., 17., 18., 19.],
        [20., 21., 22., 23., 24., 25., 26., 27., 28., 29.],
        [30., 31., 32., 33., 34., 35., 36., 37., 38., 39.],
        [40., 41., 42., 43., 44., 45., 46., 47., 48., 49.],
        [50., 51., 52., 53., 54., 55., 56., 57., 58., 59.],
        [60., 61., 62., 63., 64., 65., 66., 67., 68., 69.],
        [70., 71., 72., 73., 74., 75., 76., 77., 78., 79.],
        [80., 81., 82., 83., 84., 85., 86., 87., 88., 89.],
        [90., 91., 92., 93., 94., 95., 96., 97., 98., 99.]])

[57]:

del fp

[58]:

fp1 = np.memmap('foo.dat', dtype=np.float64, shape=(10,10))

[59]:

fp1[:5, :5]

[59]:

memmap([[ 0.,  1.,  2.,  3.,  4.],
        [10., 11., 12., 13., 14.],
        [20., 21., 22., 23., 24.],
        [30., 31., 32., 33., 34.],
        [40., 41., 42., 43., 44.]])

[60]:

fp2 = np.memmap('foo.dat', dtype=np.float64, offset=75*8, shape=(5,5))

[61]:

fp2

[61]:

memmap([[75., 76., 77., 78., 79.],
        [80., 81., 82., 83., 84.],
        [85., 86., 87., 88., 89.],
        [90., 91., 92., 93., 94.],
        [95., 96., 97., 98., 99.]])

[ ]: