Using numpy.random¶
In [1]:
import numpy as np
Random number generators¶
Continuous distributions¶
Uniform¶
In [4]:
np.random.rand(3,4)
Out[4]:
array([[ 0.1824764 , 0.54965542, 0.47202485, 0.95005208],
[ 0.94097831, 0.02402875, 0.63758556, 0.37598786],
[ 0.15309177, 0.32923875, 0.36835565, 0.71566394]])
In [7]:
np.random.random_sample((3,4))
Out[7]:
array([[ 0.79982921, 0.49167894, 0.31763543, 0.80053871],
[ 0.73086786, 0.82912843, 0.09356568, 0.25450986],
[ 0.75515315, 0.06455766, 0.10129754, 0.08602764]])
In [9]:
np.random.uniform(0, 1, (3,4))
Out[9]:
array([[ 0.94293588, 0.51498122, 0.38971777, 0.59287419],
[ 0.18248637, 0.02499295, 0.842292 , 0.57454183],
[ 0.81824035, 0.27704791, 0.62583653, 0.50282242]])
Normal¶
In [8]:
np.random.randn(3, 4)
Out[8]:
array([[ 0.60069026, 0.09858881, -1.4709185 , -0.70565061],
[-0.46729956, -1.6822813 , -0.29514808, 1.80174322],
[ 2.231532 , 0.63953851, -0.78135108, 1.13137354]])
In [11]:
np.random.standard_normal((3,4))
Out[11]:
array([[-0.0417215 , 1.02702489, 0.88650073, 0.3359106 ],
[ 0.37837503, 1.37917244, 1.26210605, -1.01897335],
[ 0.65942071, 0.24987618, 0.05533211, -1.13927286]])
In [10]:
np.random.normal(0, 1, (3,4))
Out[10]:
array([[-0.1300124 , -1.23111144, 0.82896809, 0.17212567],
[-0.98791995, -0.23104989, 2.19710114, -1.26945484],
[-0.69139685, 0.50303272, 1.17180097, -0.54426858]])
Discrete distributions¶
Uniform¶
In [16]:
np.random.randint(0, 10, (3,4))
Out[16]:
array([[6, 7, 7, 2],
[9, 6, 3, 5],
[2, 0, 4, 4]])
Binomial¶
In [17]:
np.random.binomial(10, 0.3, (3,4))
Out[17]:
array([[3, 1, 3, 5],
[5, 2, 2, 3],
[3, 1, 1, 1]])
Poisson¶
In [20]:
np.random.poisson(0.2, (3,4))
Out[20]:
array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 2, 0, 1]])
Multivariate distributions¶
Multinomial¶
You have 3 fair 6-sided dice. You roll each of them 10 times. How many times do the faces 1,2,3,4,5,6 appear?
In [92]:
np.random.multinomial(10, np.ones(6)/6, 3)
Out[92]:
array([[1, 3, 2, 1, 1, 2],
[0, 1, 2, 2, 3, 2],
[1, 2, 3, 0, 2, 2]])
Multivariate normal¶
In [112]:
x = np.array([[1, 2]]).T
sigma = x @ x.T
In [113]:
mu = np.array([5,5])
In [114]:
sigma
Out[114]:
array([[1, 2],
[2, 4]])
In [115]:
np.random.multivariate_normal(mu, sigma, 3)
Out[115]:
array([[ 6.68175511, 8.3635102 ],
[ 4.11165655, 3.22331307],
[ 4.67467303, 4.3493461 ]])
Seed¶
In [22]:
np.random.seed(123)
np.random.randn(3,4)
Out[22]:
array([[-1.0856306 , 0.99734545, 0.2829785 , -1.50629471],
[-0.57860025, 1.65143654, -2.42667924, -0.42891263],
[ 1.26593626, -0.8667404 , -0.67888615, -0.09470897]])
In [23]:
np.random.seed(123)
np.random.randn(3,4)
Out[23]:
array([[-1.0856306 , 0.99734545, 0.2829785 , -1.50629471],
[-0.57860025, 1.65143654, -2.42667924, -0.42891263],
[ 1.26593626, -0.8667404 , -0.67888615, -0.09470897]])
RNG class¶
In [48]:
rng1 = np.random.RandomState(123)
In [49]:
rng1.normal(0, 1, (3,4))
Out[49]:
array([[-1.0856306 , 0.99734545, 0.2829785 , -1.50629471],
[-0.57860025, 1.65143654, -2.42667924, -0.42891263],
[ 1.26593626, -0.8667404 , -0.67888615, -0.09470897]])
In [50]:
rng1.normal(size=(3,4))
Out[50]:
array([[ 1.49138963, -0.638902 , -0.44398196, -0.43435128],
[ 2.20593008, 2.18678609, 1.0040539 , 0.3861864 ],
[ 0.73736858, 1.49073203, -0.93583387, 1.17582904]])
In [51]:
rng2 = np.random.RandomState(123)
In [52]:
rng2.normal(0, 1, (3,4))
Out[52]:
array([[-1.0856306 , 0.99734545, 0.2829785 , -1.50629471],
[-0.57860025, 1.65143654, -2.42667924, -0.42891263],
[ 1.26593626, -0.8667404 , -0.67888615, -0.09470897]])
In [53]:
rng2.normal(size=(3,4))
Out[53]:
array([[ 1.49138963, -0.638902 , -0.44398196, -0.43435128],
[ 2.20593008, 2.18678609, 1.0040539 , 0.3861864 ],
[ 0.73736858, 1.49073203, -0.93583387, 1.17582904]])
Choice¶
In [61]:
np.random.choice(4, size=10)
Out[61]:
array([2, 1, 3, 2, 0, 2, 2, 3, 0, 3])
In [63]:
np.random.choice([1,3,5,7], 10)
Out[63]:
array([7, 5, 7, 3, 7, 5, 3, 7, 3, 5])
In [65]:
np.random.choice(['H', 'T'], 10)
Out[65]:
array(['T', 'T', 'T', 'H', 'T', 'T', 'T', 'T', 'H', 'H'],
dtype='<U1')
In [58]:
np.random.choice(list('ACTG'), 10)
Out[58]:
array(['A', 'G', 'T', 'C', 'A', 'G', 'T', 'T', 'T', 'T'],
dtype='<U1')
In [60]:
''.join(np.random.choice(list('ACTG'), 10))
Out[60]:
'CTAGGGCATA'
In [70]:
x = np.arange(24).reshape((6,4))
In [71]:
x
Out[71]:
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23]])
In [75]:
ridx = np.random.choice(x.shape[0], 10)
x[ridx]
Out[75]:
array([[20, 21, 22, 23],
[16, 17, 18, 19],
[ 4, 5, 6, 7],
[16, 17, 18, 19],
[16, 17, 18, 19],
[12, 13, 14, 15],
[16, 17, 18, 19],
[12, 13, 14, 15],
[ 0, 1, 2, 3],
[20, 21, 22, 23]])
In [76]:
cidx = np.random.choice(x.shape[1], 10)
x[:, cidx]
Out[76]:
array([[ 0, 0, 3, 0, 0, 3, 0, 0, 1, 0],
[ 4, 4, 7, 4, 4, 7, 4, 4, 5, 4],
[ 8, 8, 11, 8, 8, 11, 8, 8, 9, 8],
[12, 12, 15, 12, 12, 15, 12, 12, 13, 12],
[16, 16, 19, 16, 16, 19, 16, 16, 17, 16],
[20, 20, 23, 20, 20, 23, 20, 20, 21, 20]])
Permutations¶
In-place¶
In [77]:
x = np.arange(10)
In [78]:
np.random.shuffle(x)
In [79]:
x
Out[79]:
array([9, 1, 3, 6, 0, 5, 7, 2, 4, 8])
Exercises¶
1. Simulate 100 tosses of a fair coin. What is the longest run of consecutive heads and where in the sequence does it occur?
In [146]:
tosses = np.random.choice(['H', 'T'], 100)
In [147]:
tosses
Out[147]:
array(['T', 'H', 'H', 'H', 'T', 'H', 'T', 'T', 'T', 'H', 'H', 'T', 'H',
'T', 'H', 'T', 'T', 'T', 'T', 'H', 'T', 'H', 'H', 'H', 'T', 'H',
'T', 'H', 'T', 'T', 'H', 'H', 'T', 'H', 'H', 'T', 'T', 'T', 'T',
'T', 'H', 'H', 'T', 'T', 'T', 'H', 'T', 'H', 'T', 'T', 'H', 'H',
'H', 'H', 'H', 'T', 'H', 'H', 'T', 'H', 'H', 'H', 'H', 'T', 'T',
'T', 'T', 'H', 'H', 'H', 'T', 'T', 'H', 'T', 'H', 'H', 'H', 'H',
'T', 'H', 'H', 'H', 'H', 'H', 'T', 'T', 'H', 'H', 'H', 'H', 'T',
'T', 'H', 'T', 'H', 'H', 'H', 'T', 'H', 'T'],
dtype='<U1')
In [156]:
def find_runs(xs, target='H'):
count = 0
max_count = 0
idx = 0
for i, x in enumerate(xs):
if x == target:
count += 1
else:
if count > max_count:
max_count = count
idx = i
count = 0
return max_count, idx
In [160]:
max_count, pos = find_runs(tosses)
max_count, pos
Out[160]:
(5, 55)
In [161]:
tosses[(pos-max_count-1):(pos+1)]
Out[161]:
array(['T', 'H', 'H', 'H', 'H', 'H', 'T'],
dtype='<U1')
2. Simulate a series of 10 sets of tosses of a fair coin in which you only stop when you find 5 consecutive heads. Find the average number of coins tossed in the 10 sets.
In [150]:
import itertools as it
In [162]:
def expt(n=5):
count = 0
for i in it.count(1):
toss = np.random.choice(list('HT'))
if toss == 'H':
count += 1
if count == n:
return i
In [171]:
np.mean([expt() for i in range(10)])
Out[171]:
9.3000000000000007
Direct method using negative binomial sampling
In [172]:
np.mean(5 + np.random.negative_binomial(5, 0.5, 10))
Out[172]:
10.800000000000001
3. A doubly stochastic matrix is one where each row and each column sums up to 1, and each cell value can be interpreted as a probability (i.e. it is between 0 and 1). Construct a doubly stochastic matrix of size (4,4) starting from a (4,4) matrix of standard random uniform samples.
In [173]:
x = np.random.random((4,4))
In [191]:
rsum = None
csum = None
while (np.any(rsum != 1)) | (np.any(csum != 1)):
x /= x.sum(0)
x = x / x.sum(1)[:, np.newaxis]
rsum = x.sum(1)
csum = x.sum(0)
In [192]:
x
Out[192]:
array([[ 0.04242593, 0.09297455, 0.597877 , 0.26672252],
[ 0.51742067, 0.07877105, 0.19201241, 0.21179587],
[ 0.07656644, 0.50788136, 0.10556133, 0.30999087],
[ 0.36358697, 0.32037303, 0.10454926, 0.21149075]])
In [193]:
x.sum(0)
Out[193]:
array([ 1., 1., 1., 1.])
In [194]:
x.sum(1)
Out[194]:
array([ 1., 1., 1., 1.])
In [ ]: