Using numpy.random

In [1]:
import numpy as np

Random number generators

Continuous distributions

Uniform

In [4]:
np.random.rand(3,4)
Out[4]:
array([[ 0.1824764 ,  0.54965542,  0.47202485,  0.95005208],
       [ 0.94097831,  0.02402875,  0.63758556,  0.37598786],
       [ 0.15309177,  0.32923875,  0.36835565,  0.71566394]])
In [7]:
np.random.random_sample((3,4))
Out[7]:
array([[ 0.79982921,  0.49167894,  0.31763543,  0.80053871],
       [ 0.73086786,  0.82912843,  0.09356568,  0.25450986],
       [ 0.75515315,  0.06455766,  0.10129754,  0.08602764]])
In [9]:
np.random.uniform(0, 1, (3,4))
Out[9]:
array([[ 0.94293588,  0.51498122,  0.38971777,  0.59287419],
       [ 0.18248637,  0.02499295,  0.842292  ,  0.57454183],
       [ 0.81824035,  0.27704791,  0.62583653,  0.50282242]])

Normal

In [8]:
np.random.randn(3, 4)
Out[8]:
array([[ 0.60069026,  0.09858881, -1.4709185 , -0.70565061],
       [-0.46729956, -1.6822813 , -0.29514808,  1.80174322],
       [ 2.231532  ,  0.63953851, -0.78135108,  1.13137354]])
In [11]:
np.random.standard_normal((3,4))
Out[11]:
array([[-0.0417215 ,  1.02702489,  0.88650073,  0.3359106 ],
       [ 0.37837503,  1.37917244,  1.26210605, -1.01897335],
       [ 0.65942071,  0.24987618,  0.05533211, -1.13927286]])
In [10]:
np.random.normal(0, 1, (3,4))
Out[10]:
array([[-0.1300124 , -1.23111144,  0.82896809,  0.17212567],
       [-0.98791995, -0.23104989,  2.19710114, -1.26945484],
       [-0.69139685,  0.50303272,  1.17180097, -0.54426858]])

Discrete distributions

Uniform

In [16]:
np.random.randint(0, 10, (3,4))
Out[16]:
array([[6, 7, 7, 2],
       [9, 6, 3, 5],
       [2, 0, 4, 4]])

Binomial

In [17]:
np.random.binomial(10, 0.3, (3,4))
Out[17]:
array([[3, 1, 3, 5],
       [5, 2, 2, 3],
       [3, 1, 1, 1]])

Poisson

In [20]:
np.random.poisson(0.2, (3,4))
Out[20]:
array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 2, 0, 1]])

Multivariate distributions

Multinomial

You have 3 fair 6-sided dice. You roll each of them 10 times. How many times do the faces 1,2,3,4,5,6 appear?

In [92]:
np.random.multinomial(10, np.ones(6)/6, 3)
Out[92]:
array([[1, 3, 2, 1, 1, 2],
       [0, 1, 2, 2, 3, 2],
       [1, 2, 3, 0, 2, 2]])

Multivariate normal

In [112]:
x = np.array([[1, 2]]).T
sigma = x @ x.T
In [113]:
mu = np.array([5,5])
In [114]:
sigma
Out[114]:
array([[1, 2],
       [2, 4]])
In [115]:
np.random.multivariate_normal(mu, sigma, 3)
Out[115]:
array([[ 6.68175511,  8.3635102 ],
       [ 4.11165655,  3.22331307],
       [ 4.67467303,  4.3493461 ]])

Seed

In [22]:
np.random.seed(123)
np.random.randn(3,4)
Out[22]:
array([[-1.0856306 ,  0.99734545,  0.2829785 , -1.50629471],
       [-0.57860025,  1.65143654, -2.42667924, -0.42891263],
       [ 1.26593626, -0.8667404 , -0.67888615, -0.09470897]])
In [23]:
np.random.seed(123)
np.random.randn(3,4)
Out[23]:
array([[-1.0856306 ,  0.99734545,  0.2829785 , -1.50629471],
       [-0.57860025,  1.65143654, -2.42667924, -0.42891263],
       [ 1.26593626, -0.8667404 , -0.67888615, -0.09470897]])

RNG class

In [48]:
rng1 = np.random.RandomState(123)
In [49]:
rng1.normal(0, 1, (3,4))
Out[49]:
array([[-1.0856306 ,  0.99734545,  0.2829785 , -1.50629471],
       [-0.57860025,  1.65143654, -2.42667924, -0.42891263],
       [ 1.26593626, -0.8667404 , -0.67888615, -0.09470897]])
In [50]:
rng1.normal(size=(3,4))
Out[50]:
array([[ 1.49138963, -0.638902  , -0.44398196, -0.43435128],
       [ 2.20593008,  2.18678609,  1.0040539 ,  0.3861864 ],
       [ 0.73736858,  1.49073203, -0.93583387,  1.17582904]])
In [51]:
rng2 = np.random.RandomState(123)
In [52]:
rng2.normal(0, 1, (3,4))
Out[52]:
array([[-1.0856306 ,  0.99734545,  0.2829785 , -1.50629471],
       [-0.57860025,  1.65143654, -2.42667924, -0.42891263],
       [ 1.26593626, -0.8667404 , -0.67888615, -0.09470897]])
In [53]:
rng2.normal(size=(3,4))
Out[53]:
array([[ 1.49138963, -0.638902  , -0.44398196, -0.43435128],
       [ 2.20593008,  2.18678609,  1.0040539 ,  0.3861864 ],
       [ 0.73736858,  1.49073203, -0.93583387,  1.17582904]])

Choice

In [61]:
np.random.choice(4, size=10)
Out[61]:
array([2, 1, 3, 2, 0, 2, 2, 3, 0, 3])
In [63]:
np.random.choice([1,3,5,7], 10)
Out[63]:
array([7, 5, 7, 3, 7, 5, 3, 7, 3, 5])
In [65]:
np.random.choice(['H', 'T'], 10)
Out[65]:
array(['T', 'T', 'T', 'H', 'T', 'T', 'T', 'T', 'H', 'H'],
      dtype='<U1')
In [58]:
np.random.choice(list('ACTG'), 10)
Out[58]:
array(['A', 'G', 'T', 'C', 'A', 'G', 'T', 'T', 'T', 'T'],
      dtype='<U1')
In [60]:
''.join(np.random.choice(list('ACTG'), 10))
Out[60]:
'CTAGGGCATA'
In [70]:
x = np.arange(24).reshape((6,4))
In [71]:
x
Out[71]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])
In [75]:
ridx = np.random.choice(x.shape[0], 10)
x[ridx]
Out[75]:
array([[20, 21, 22, 23],
       [16, 17, 18, 19],
       [ 4,  5,  6,  7],
       [16, 17, 18, 19],
       [16, 17, 18, 19],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [12, 13, 14, 15],
       [ 0,  1,  2,  3],
       [20, 21, 22, 23]])
In [76]:
cidx = np.random.choice(x.shape[1], 10)
x[:, cidx]
Out[76]:
array([[ 0,  0,  3,  0,  0,  3,  0,  0,  1,  0],
       [ 4,  4,  7,  4,  4,  7,  4,  4,  5,  4],
       [ 8,  8, 11,  8,  8, 11,  8,  8,  9,  8],
       [12, 12, 15, 12, 12, 15, 12, 12, 13, 12],
       [16, 16, 19, 16, 16, 19, 16, 16, 17, 16],
       [20, 20, 23, 20, 20, 23, 20, 20, 21, 20]])

Permutations

In-place

In [77]:
x = np.arange(10)
In [78]:
np.random.shuffle(x)
In [79]:
x
Out[79]:
array([9, 1, 3, 6, 0, 5, 7, 2, 4, 8])

Make a copy

In [80]:
x = np.arange(10)
In [81]:
np.random.permutation(x)
Out[81]:
array([0, 3, 9, 1, 5, 4, 2, 7, 8, 6])

Can also give the size to get permutation indexes

In [82]:
np.random.permutation(10)
Out[82]:
array([2, 0, 6, 8, 9, 3, 4, 5, 7, 1])

Exercises

1. Simulate 100 tosses of a fair coin. What is the longest run of consecutive heads and where in the sequence does it occur?

In [146]:
tosses = np.random.choice(['H', 'T'], 100)
In [147]:
tosses
Out[147]:
array(['T', 'H', 'H', 'H', 'T', 'H', 'T', 'T', 'T', 'H', 'H', 'T', 'H',
       'T', 'H', 'T', 'T', 'T', 'T', 'H', 'T', 'H', 'H', 'H', 'T', 'H',
       'T', 'H', 'T', 'T', 'H', 'H', 'T', 'H', 'H', 'T', 'T', 'T', 'T',
       'T', 'H', 'H', 'T', 'T', 'T', 'H', 'T', 'H', 'T', 'T', 'H', 'H',
       'H', 'H', 'H', 'T', 'H', 'H', 'T', 'H', 'H', 'H', 'H', 'T', 'T',
       'T', 'T', 'H', 'H', 'H', 'T', 'T', 'H', 'T', 'H', 'H', 'H', 'H',
       'T', 'H', 'H', 'H', 'H', 'H', 'T', 'T', 'H', 'H', 'H', 'H', 'T',
       'T', 'H', 'T', 'H', 'H', 'H', 'T', 'H', 'T'],
      dtype='<U1')
In [156]:
def find_runs(xs, target='H'):
    count = 0
    max_count = 0
    idx = 0
    for i, x in enumerate(xs):
        if x == target:
            count += 1
        else:
            if count > max_count:
                max_count = count
                idx = i
            count = 0
    return max_count, idx
In [160]:
max_count, pos = find_runs(tosses)
max_count, pos
Out[160]:
(5, 55)
In [161]:
tosses[(pos-max_count-1):(pos+1)]
Out[161]:
array(['T', 'H', 'H', 'H', 'H', 'H', 'T'],
      dtype='<U1')

2. Simulate a series of 10 sets of tosses of a fair coin in which you only stop when you find 5 consecutive heads. Find the average number of coins tossed in the 10 sets.

In [150]:
import itertools as it
In [162]:
def expt(n=5):
    count = 0
    for i in it.count(1):
        toss = np.random.choice(list('HT'))
        if toss == 'H':
            count += 1
            if count == n:
                return i
In [171]:
np.mean([expt() for i in range(10)])
Out[171]:
9.3000000000000007

Direct method using negative binomial sampling

In [172]:
np.mean(5 + np.random.negative_binomial(5, 0.5, 10))
Out[172]:
10.800000000000001

3. A doubly stochastic matrix is one where each row and each column sums up to 1, and each cell value can be interpreted as a probability (i.e. it is between 0 and 1). Construct a doubly stochastic matrix of size (4,4) starting from a (4,4) matrix of standard random uniform samples.

In [173]:
x = np.random.random((4,4))
In [191]:
rsum = None
csum = None

while (np.any(rsum != 1)) | (np.any(csum != 1)):
    x /= x.sum(0)
    x = x / x.sum(1)[:, np.newaxis]
    rsum = x.sum(1)
    csum = x.sum(0)
In [192]:
x
Out[192]:
array([[ 0.04242593,  0.09297455,  0.597877  ,  0.26672252],
       [ 0.51742067,  0.07877105,  0.19201241,  0.21179587],
       [ 0.07656644,  0.50788136,  0.10556133,  0.30999087],
       [ 0.36358697,  0.32037303,  0.10454926,  0.21149075]])
In [193]:
x.sum(0)
Out[193]:
array([ 1.,  1.,  1.,  1.])
In [194]:
x.sum(1)
Out[194]:
array([ 1.,  1.,  1.,  1.])
In [ ]: