Cython¶
Cython is an “optimizing static compiler” that combines Python with C to generate optimized code. Since Cython is a superset of Python, all valid Python programs are also valid Cython programs. However, by providing hints and static typing, we can get much faster programs. Note that while numba
often provides similar speedups with less work,, an advantage of Cython is that it is easy to distribute optimized Cython modules since they can be built with the standard Python setup.py
script.
We have already seen how to use Cython to wrap C and C++ functions from existing libraries. Here we will see how to use Cython to speed up Python functions.
[1]:
%matplotlib inline
import matplotlib.pyplot as plt
Resources¶
Utility function for timing functions
[2]:
import time
[3]:
def timer(f, *args, **kwargs):
start = time.clock()
ans = f(*args, **kwargs)
return ans, time.clock() - start
[4]:
def report(fs, *args, **kwargs):
ans, t = timer(fs[0], *args, **kwargs)
for f in fs[1:]:
ans_, t_ = timer(f, *args, **kwargs)
print('%s: %.1f' % (f.__name__, t/t_))
Incremental improvements¶
Generally, we start with a pure Python function, run it through Cython with the annotate -a
flag, and incrementally modify the code until the yellow parts are minimized.
How to build Cython modules¶
Using Cython consists of these steps:
Write a .pyx source file
Run the Cython compiler to generate a C file
Run a C compiler to generate a compiled library
Run the Python interpreter and ask it to import the module
If you are developing a package and want to Cythonize some or all functions, you will need to follow the steps above. Refer to official docs - the most relevant information is here. You should try to build a trivial package with Cython.
In the Jupyter notebook, we can use the %%cython
cell magic to automate these steps.
[5]:
%load_ext cython
Matrix multiplication example¶
[6]:
def matrix_multiply(u, v, res):
m, n = u.shape
n, p = v.shape
for i in range(m):
for j in range(p):
res[i,j] = 0
for k in range(n):
res[i,j] += u[i,k] * v[k,j]
return res
[7]:
import numpy as np
u = np.random.random((10,20))
v = np.random.random((20,5))
[8]:
res = np.zeros((u.shape[0], v.shape[1]))
matrix_multiply(u, v, res)
[8]:
array([[5.28819857, 4.92009694, 6.19984975, 4.96502503, 5.4413764 ],
[5.24114621, 5.43029087, 6.94559647, 5.07593234, 5.5442953 ],
[4.5385604 , 4.64487102, 5.14351158, 3.18284368, 4.04290799],
[5.68621682, 5.0272838 , 5.7722984 , 4.78287909, 5.68644012],
[5.40108532, 5.37948773, 6.67033692, 4.60293497, 6.21043122],
[3.69523498, 5.0616336 , 5.31608148, 3.93660024, 5.16258372],
[4.58358545, 3.70848199, 5.03566061, 3.96088381, 4.82956606],
[4.52492311, 5.3097423 , 5.25643487, 3.98634609, 5.76239054],
[5.60202322, 5.60757914, 6.88260326, 5.57151343, 7.10723609],
[3.97700115, 4.61827062, 4.65388262, 3.99595984, 4.92395776]])
[9]:
res = np.zeros((u.shape[0], v.shape[1]))
%timeit -r3 -n3 matrix_multiply(u, v, res)
1.07 ms ± 37.1 µs per loop (mean ± std. dev. of 3 runs, 3 loops each)
Using Cython annnotations to identify bottlenecks¶
[10]:
%%cython -a
import numpy as np
def matrix_multiply1(u, v, res):
m, n = u.shape
n, p = v.shape
for i in range(m):
for j in range(p):
res[i,j] = 0
for k in range(n):
res[i,j] += u[i,k] * v[k,j]
return res
[10]:
Generated by Cython 0.29.14
Yellow lines hint at Python interaction.
Click on a line that starts with a "+
" to see the C code that Cython generated for it.
01:
+02: import numpy as np
__pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
03:
+04: def matrix_multiply1(u, v, res):
/* Python wrapper */ static PyObject *__pyx_pw_46_cython_magic_4bee177fe8017f647c6514633a07411f_1matrix_multiply1(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static PyMethodDef __pyx_mdef_46_cython_magic_4bee177fe8017f647c6514633a07411f_1matrix_multiply1 = {"matrix_multiply1", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_46_cython_magic_4bee177fe8017f647c6514633a07411f_1matrix_multiply1, METH_VARARGS|METH_KEYWORDS, 0}; static PyObject *__pyx_pw_46_cython_magic_4bee177fe8017f647c6514633a07411f_1matrix_multiply1(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_u = 0; PyObject *__pyx_v_v = 0; PyObject *__pyx_v_res = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("matrix_multiply1 (wrapper)", 0); { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_u,&__pyx_n_s_v,&__pyx_n_s_res,0}; PyObject* values[3] = {0,0,0}; if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); CYTHON_FALLTHROUGH; case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); CYTHON_FALLTHROUGH; case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); CYTHON_FALLTHROUGH; case 0: break; default: goto __pyx_L5_argtuple_error; } kw_args = PyDict_Size(__pyx_kwds); switch (pos_args) { case 0: if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_u)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; CYTHON_FALLTHROUGH; case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_v)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("matrix_multiply1", 1, 3, 3, 1); __PYX_ERR(0, 4, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_res)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("matrix_multiply1", 1, 3, 3, 2); __PYX_ERR(0, 4, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "matrix_multiply1") < 0)) __PYX_ERR(0, 4, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { goto __pyx_L5_argtuple_error; } else { values[0] = PyTuple_GET_ITEM(__pyx_args, 0); values[1] = PyTuple_GET_ITEM(__pyx_args, 1); values[2] = PyTuple_GET_ITEM(__pyx_args, 2); } __pyx_v_u = values[0]; __pyx_v_v = values[1]; __pyx_v_res = values[2]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("matrix_multiply1", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 4, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("_cython_magic_4bee177fe8017f647c6514633a07411f.matrix_multiply1", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_46_cython_magic_4bee177fe8017f647c6514633a07411f_matrix_multiply1(__pyx_self, __pyx_v_u, __pyx_v_v, __pyx_v_res); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_46_cython_magic_4bee177fe8017f647c6514633a07411f_matrix_multiply1(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_u, PyObject *__pyx_v_v, PyObject *__pyx_v_res) { PyObject *__pyx_v_m = NULL; PyObject *__pyx_v_n = NULL; PyObject *__pyx_v_p = NULL; PyObject *__pyx_v_i = NULL; PyObject *__pyx_v_j = NULL; PyObject *__pyx_v_k = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("matrix_multiply1", 0); /* … */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_4); __Pyx_XDECREF(__pyx_t_12); __Pyx_XDECREF(__pyx_t_13); __Pyx_XDECREF(__pyx_t_14); __Pyx_XDECREF(__pyx_t_15); __Pyx_AddTraceback("_cython_magic_4bee177fe8017f647c6514633a07411f.matrix_multiply1", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_m); __Pyx_XDECREF(__pyx_v_n); __Pyx_XDECREF(__pyx_v_p); __Pyx_XDECREF(__pyx_v_i); __Pyx_XDECREF(__pyx_v_j); __Pyx_XDECREF(__pyx_v_k); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* … */ __pyx_tuple_ = PyTuple_Pack(9, __pyx_n_s_u, __pyx_n_s_v, __pyx_n_s_res, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_p, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 4, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); /* … */ __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_4bee177fe8017f647c6514633a07411f_1matrix_multiply1, NULL, __pyx_n_s_cython_magic_4bee177fe8017f647c); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 4, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_matrix_multiply1, __pyx_t_1) < 0) __PYX_ERR(0, 4, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+05: m, n = u.shape
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_u, __pyx_n_s_shape); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if ((likely(PyTuple_CheckExact(__pyx_t_1))) || (PyList_CheckExact(__pyx_t_1))) { PyObject* sequence = __pyx_t_1; Py_ssize_t size = __Pyx_PySequence_SIZE(sequence); if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); __PYX_ERR(0, 5, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS if (likely(PyTuple_CheckExact(sequence))) { __pyx_t_2 = PyTuple_GET_ITEM(sequence, 0); __pyx_t_3 = PyTuple_GET_ITEM(sequence, 1); } else { __pyx_t_2 = PyList_GET_ITEM(sequence, 0); __pyx_t_3 = PyList_GET_ITEM(sequence, 1); } __Pyx_INCREF(__pyx_t_2); __Pyx_INCREF(__pyx_t_3); #else __pyx_t_2 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { Py_ssize_t index = -1; __pyx_t_4 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_5 = Py_TYPE(__pyx_t_4)->tp_iternext; index = 0; __pyx_t_2 = __pyx_t_5(__pyx_t_4); if (unlikely(!__pyx_t_2)) goto __pyx_L3_unpacking_failed; __Pyx_GOTREF(__pyx_t_2); index = 1; __pyx_t_3 = __pyx_t_5(__pyx_t_4); if (unlikely(!__pyx_t_3)) goto __pyx_L3_unpacking_failed; __Pyx_GOTREF(__pyx_t_3); if (__Pyx_IternextUnpackEndCheck(__pyx_t_5(__pyx_t_4), 2) < 0) __PYX_ERR(0, 5, __pyx_L1_error) __pyx_t_5 = NULL; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; goto __pyx_L4_unpacking_done; __pyx_L3_unpacking_failed:; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_5 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); __PYX_ERR(0, 5, __pyx_L1_error) __pyx_L4_unpacking_done:; } __pyx_v_m = __pyx_t_2; __pyx_t_2 = 0; __pyx_v_n = __pyx_t_3; __pyx_t_3 = 0;
+06: n, p = v.shape
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_v, __pyx_n_s_shape); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if ((likely(PyTuple_CheckExact(__pyx_t_1))) || (PyList_CheckExact(__pyx_t_1))) { PyObject* sequence = __pyx_t_1; Py_ssize_t size = __Pyx_PySequence_SIZE(sequence); if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); __PYX_ERR(0, 6, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS if (likely(PyTuple_CheckExact(sequence))) { __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); __pyx_t_2 = PyTuple_GET_ITEM(sequence, 1); } else { __pyx_t_3 = PyList_GET_ITEM(sequence, 0); __pyx_t_2 = PyList_GET_ITEM(sequence, 1); } __Pyx_INCREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_2); #else __pyx_t_3 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_2 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); #endif __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { Py_ssize_t index = -1; __pyx_t_4 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_5 = Py_TYPE(__pyx_t_4)->tp_iternext; index = 0; __pyx_t_3 = __pyx_t_5(__pyx_t_4); if (unlikely(!__pyx_t_3)) goto __pyx_L5_unpacking_failed; __Pyx_GOTREF(__pyx_t_3); index = 1; __pyx_t_2 = __pyx_t_5(__pyx_t_4); if (unlikely(!__pyx_t_2)) goto __pyx_L5_unpacking_failed; __Pyx_GOTREF(__pyx_t_2); if (__Pyx_IternextUnpackEndCheck(__pyx_t_5(__pyx_t_4), 2) < 0) __PYX_ERR(0, 6, __pyx_L1_error) __pyx_t_5 = NULL; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; goto __pyx_L6_unpacking_done; __pyx_L5_unpacking_failed:; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_5 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); __PYX_ERR(0, 6, __pyx_L1_error) __pyx_L6_unpacking_done:; } __Pyx_DECREF_SET(__pyx_v_n, __pyx_t_3); __pyx_t_3 = 0; __pyx_v_p = __pyx_t_2; __pyx_t_2 = 0;
+07: for i in range(m):
__pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_builtin_range, __pyx_v_m); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_2 = __pyx_t_1; __Pyx_INCREF(__pyx_t_2); __pyx_t_6 = 0; __pyx_t_7 = NULL; } else { __pyx_t_6 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 7, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_7 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 7, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { if (likely(!__pyx_t_7)) { if (likely(PyList_CheckExact(__pyx_t_2))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_2)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_1 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 7, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_2, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_2)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_6); __Pyx_INCREF(__pyx_t_1); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 7, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_2, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } } else { __pyx_t_1 = __pyx_t_7(__pyx_t_2); if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); else __PYX_ERR(0, 7, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_1); } __Pyx_XDECREF_SET(__pyx_v_i, __pyx_t_1); __pyx_t_1 = 0; /* … */ } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+08: for j in range(p):
__pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_builtin_range, __pyx_v_p); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_3 = __pyx_t_1; __Pyx_INCREF(__pyx_t_3); __pyx_t_8 = 0; __pyx_t_9 = NULL; } else { __pyx_t_8 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 8, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_9 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 8, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { if (likely(!__pyx_t_9)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_8); __Pyx_INCREF(__pyx_t_1); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 8, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_8); __Pyx_INCREF(__pyx_t_1); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 8, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } } else { __pyx_t_1 = __pyx_t_9(__pyx_t_3); if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); else __PYX_ERR(0, 8, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_1); } __Pyx_XDECREF_SET(__pyx_v_j, __pyx_t_1); __pyx_t_1 = 0; /* … */ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+09: res[i,j] = 0
__pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_v_i); __Pyx_GIVEREF(__pyx_v_i); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_i); __Pyx_INCREF(__pyx_v_j); __Pyx_GIVEREF(__pyx_v_j); PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_v_j); if (unlikely(PyObject_SetItem(__pyx_v_res, __pyx_t_1, __pyx_int_0) < 0)) __PYX_ERR(0, 9, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+10: for k in range(n):
__pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_builtin_range, __pyx_v_n); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_4 = __pyx_t_1; __Pyx_INCREF(__pyx_t_4); __pyx_t_10 = 0; __pyx_t_11 = NULL; } else { __pyx_t_10 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_11 = Py_TYPE(__pyx_t_4)->tp_iternext; if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 10, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { if (likely(!__pyx_t_11)) { if (likely(PyList_CheckExact(__pyx_t_4))) { if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_4)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_1 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_10); __Pyx_INCREF(__pyx_t_1); __pyx_t_10++; if (unlikely(0 < 0)) __PYX_ERR(0, 10, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_4)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_10); __Pyx_INCREF(__pyx_t_1); __pyx_t_10++; if (unlikely(0 < 0)) __PYX_ERR(0, 10, __pyx_L1_error) #else __pyx_t_1 = PySequence_ITEM(__pyx_t_4, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } } else { __pyx_t_1 = __pyx_t_11(__pyx_t_4); if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); else __PYX_ERR(0, 10, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_1); } __Pyx_XDECREF_SET(__pyx_v_k, __pyx_t_1); __pyx_t_1 = 0; /* … */ } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+11: res[i,j] += u[i,k] * v[k,j]
__pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_v_i); __Pyx_GIVEREF(__pyx_v_i); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_i); __Pyx_INCREF(__pyx_v_j); __Pyx_GIVEREF(__pyx_v_j); PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_v_j); __pyx_t_12 = __Pyx_PyObject_GetItem(__pyx_v_res, __pyx_t_1); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_12); __pyx_t_13 = PyTuple_New(2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_INCREF(__pyx_v_i); __Pyx_GIVEREF(__pyx_v_i); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_v_i); __Pyx_INCREF(__pyx_v_k); __Pyx_GIVEREF(__pyx_v_k); PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_v_k); __pyx_t_14 = __Pyx_PyObject_GetItem(__pyx_v_u, __pyx_t_13); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __pyx_t_13 = PyTuple_New(2); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_INCREF(__pyx_v_k); __Pyx_GIVEREF(__pyx_v_k); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_v_k); __Pyx_INCREF(__pyx_v_j); __Pyx_GIVEREF(__pyx_v_j); PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_v_j); __pyx_t_15 = __Pyx_PyObject_GetItem(__pyx_v_v, __pyx_t_13); if (unlikely(!__pyx_t_15)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __pyx_t_13 = PyNumber_Multiply(__pyx_t_14, __pyx_t_15); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; __pyx_t_15 = PyNumber_InPlaceAdd(__pyx_t_12, __pyx_t_13); if (unlikely(!__pyx_t_15)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; if (unlikely(PyObject_SetItem(__pyx_v_res, __pyx_t_1, __pyx_t_15) < 0)) __PYX_ERR(0, 11, __pyx_L1_error) __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+12: return res
__Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_v_res); __pyx_r = __pyx_v_res; goto __pyx_L0;
Using Cython cdefs and directives¶
cdef
defines values and functions to be called from Ccpdef
defines functions that can be called from C or Pythoncython.function
functions or decorators provide directives for how the code is compiledFor arrays, Cython uses typed memory views
cimport
imports C libraries that can be used directly in Cython codeTo use general C functions from a header file X, use
cdef extern from "X.h
and declare the function signatureYou can use Cython to wrap external C or C++ libraries, but this is not covered in the course. Instead, we will explore how to use
pybind11
to do that.
[11]:
%%cython -a
import cython
@cython.boundscheck(False)
@cython.wraparound(False)
def matrix_multiply1(double[:,:] u, double[:, :] v, double[:, :] res):
cdef int i, j, k
cdef int m, n, p
m = u.shape[0]
n = u.shape[1]
p = v.shape[1]
with cython.nogil:
for i in range(m):
for j in range(p):
res[i,j] = 0
for k in range(n):
res[i,j] += u[i,k] * v[k,j]
[11]:
Generated by Cython 0.29.14
Yellow lines hint at Python interaction.
Click on a line that starts with a "+
" to see the C code that Cython generated for it.
01:
+02: import cython
__pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
03:
04: @cython.boundscheck(False)
05: @cython.wraparound(False)
+06: def matrix_multiply1(double[:,:] u, double[:, :] v, double[:, :] res):
/* Python wrapper */ static PyObject *__pyx_pw_46_cython_magic_25e12f8b9e6b0f4600f6e96a3185ac22_1matrix_multiply1(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static PyMethodDef __pyx_mdef_46_cython_magic_25e12f8b9e6b0f4600f6e96a3185ac22_1matrix_multiply1 = {"matrix_multiply1", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_46_cython_magic_25e12f8b9e6b0f4600f6e96a3185ac22_1matrix_multiply1, METH_VARARGS|METH_KEYWORDS, 0}; static PyObject *__pyx_pw_46_cython_magic_25e12f8b9e6b0f4600f6e96a3185ac22_1matrix_multiply1(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { __Pyx_memviewslice __pyx_v_u = { 0, 0, { 0 }, { 0 }, { 0 } }; __Pyx_memviewslice __pyx_v_v = { 0, 0, { 0 }, { 0 }, { 0 } }; __Pyx_memviewslice __pyx_v_res = { 0, 0, { 0 }, { 0 }, { 0 } }; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("matrix_multiply1 (wrapper)", 0); { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_u,&__pyx_n_s_v,&__pyx_n_s_res,0}; PyObject* values[3] = {0,0,0}; if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); CYTHON_FALLTHROUGH; case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); CYTHON_FALLTHROUGH; case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); CYTHON_FALLTHROUGH; case 0: break; default: goto __pyx_L5_argtuple_error; } kw_args = PyDict_Size(__pyx_kwds); switch (pos_args) { case 0: if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_u)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; CYTHON_FALLTHROUGH; case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_v)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("matrix_multiply1", 1, 3, 3, 1); __PYX_ERR(0, 6, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_res)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("matrix_multiply1", 1, 3, 3, 2); __PYX_ERR(0, 6, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "matrix_multiply1") < 0)) __PYX_ERR(0, 6, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { goto __pyx_L5_argtuple_error; } else { values[0] = PyTuple_GET_ITEM(__pyx_args, 0); values[1] = PyTuple_GET_ITEM(__pyx_args, 1); values[2] = PyTuple_GET_ITEM(__pyx_args, 2); } __pyx_v_u = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[0], PyBUF_WRITABLE); if (unlikely(!__pyx_v_u.memview)) __PYX_ERR(0, 6, __pyx_L3_error) __pyx_v_v = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[1], PyBUF_WRITABLE); if (unlikely(!__pyx_v_v.memview)) __PYX_ERR(0, 6, __pyx_L3_error) __pyx_v_res = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[2], PyBUF_WRITABLE); if (unlikely(!__pyx_v_res.memview)) __PYX_ERR(0, 6, __pyx_L3_error) } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("matrix_multiply1", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 6, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("_cython_magic_25e12f8b9e6b0f4600f6e96a3185ac22.matrix_multiply1", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_46_cython_magic_25e12f8b9e6b0f4600f6e96a3185ac22_matrix_multiply1(__pyx_self, __pyx_v_u, __pyx_v_v, __pyx_v_res); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_46_cython_magic_25e12f8b9e6b0f4600f6e96a3185ac22_matrix_multiply1(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_u, __Pyx_memviewslice __pyx_v_v, __Pyx_memviewslice __pyx_v_res) { int __pyx_v_i; int __pyx_v_j; int __pyx_v_k; int __pyx_v_m; int __pyx_v_n; int __pyx_v_p; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("matrix_multiply1", 0); /* … */ /* function exit code */ __pyx_r = Py_None; __Pyx_INCREF(Py_None); __PYX_XDEC_MEMVIEW(&__pyx_v_u, 1); __PYX_XDEC_MEMVIEW(&__pyx_v_v, 1); __PYX_XDEC_MEMVIEW(&__pyx_v_res, 1); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* … */ __pyx_tuple__19 = PyTuple_Pack(9, __pyx_n_s_u, __pyx_n_s_v, __pyx_n_s_res, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_p); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__19); __Pyx_GIVEREF(__pyx_tuple__19); /* … */ __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_25e12f8b9e6b0f4600f6e96a3185ac22_1matrix_multiply1, NULL, __pyx_n_s_cython_magic_25e12f8b9e6b0f4600); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_matrix_multiply1, __pyx_t_1) < 0) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(3, 0, 9, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jovyan_cache_ipython_cytho, __pyx_n_s_matrix_multiply1, 6, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 6, __pyx_L1_error)
07: cdef int i, j, k
08: cdef int m, n, p
09:
+10: m = u.shape[0]
__pyx_v_m = (__pyx_v_u.shape[0]);
+11: n = u.shape[1]
__pyx_v_n = (__pyx_v_u.shape[1]);
+12: p = v.shape[1]
__pyx_v_p = (__pyx_v_v.shape[1]);
13:
+14: with cython.nogil:
{ #ifdef WITH_THREAD PyThreadState *_save; Py_UNBLOCK_THREADS __Pyx_FastGIL_Remember(); #endif /*try:*/ { /* … */ /*finally:*/ { /*normal exit:*/{ #ifdef WITH_THREAD __Pyx_FastGIL_Forget(); Py_BLOCK_THREADS #endif goto __pyx_L5; } __pyx_L5:; } }
+15: for i in range(m):
__pyx_t_1 = __pyx_v_m; __pyx_t_2 = __pyx_t_1; for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3;
+16: for j in range(p):
__pyx_t_4 = __pyx_v_p; __pyx_t_5 = __pyx_t_4; for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { __pyx_v_j = __pyx_t_6;
+17: res[i,j] = 0
__pyx_t_7 = __pyx_v_i; __pyx_t_8 = __pyx_v_j; *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_res.data + __pyx_t_7 * __pyx_v_res.strides[0]) ) + __pyx_t_8 * __pyx_v_res.strides[1]) )) = 0.0;
+18: for k in range(n):
__pyx_t_9 = __pyx_v_n; __pyx_t_10 = __pyx_t_9; for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_10; __pyx_t_11+=1) { __pyx_v_k = __pyx_t_11;
+19: res[i,j] += u[i,k] * v[k,j]
__pyx_t_12 = __pyx_v_i; __pyx_t_13 = __pyx_v_k; __pyx_t_14 = __pyx_v_k; __pyx_t_15 = __pyx_v_j; __pyx_t_16 = __pyx_v_i; __pyx_t_17 = __pyx_v_j; *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_res.data + __pyx_t_16 * __pyx_v_res.strides[0]) ) + __pyx_t_17 * __pyx_v_res.strides[1]) )) += ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_u.data + __pyx_t_12 * __pyx_v_u.strides[0]) ) + __pyx_t_13 * __pyx_v_u.strides[1]) ))) * (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_v.data + __pyx_t_14 * __pyx_v_v.strides[0]) ) + __pyx_t_15 * __pyx_v_v.strides[1]) )))); } } } }
[12]:
res = np.zeros((u.shape[0], v.shape[1]))
%timeit -r3 -n3 matrix_multiply1(u, v, res)
The slowest run took 6.47 times longer than the fastest. This could mean that an intermediate result is being cached.
27 µs ± 17.7 µs per loop (mean ± std. dev. of 3 runs, 3 loops each)
Parallel execution with Cython¶
Will not work unless OpenMP is installed. Note that we explicitly turn off the GIL with cython.nogil
[13]:
%%cython --compile-args=-fopenmp --link-args=-fopenmp --force -I /usr/local/opt/libomp/include -L /usr/local/opt/libomp/lib
import cython
from cython.parallel import parallel, prange
@cython.boundscheck(False)
@cython.wraparound(False)
def matrix_multiply2(double[:,:] u, double[:, :] v, double[:, :] res):
cdef int i, j, k
cdef int m, n, p
m = u.shape[0]
n = u.shape[1]
p = v.shape[1]
with cython.nogil, parallel():
for i in prange(m):
for j in prange(p):
res[i,j] = 0
for k in range(n):
res[i,j] += u[i,k] * v[k,j]
[14]:
res = np.zeros((u.shape[0], v.shape[1]))
%timeit -r3 -n3 matrix_multiply2(u, v, res)
10.2 ms ± 1.63 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)
Speeding up Mandelbrot set visualizations¶
[15]:
import time
[16]:
# color function for point at (x, y)
def mandel(x, y, max_iters):
c = complex(x, y)
z = 0.0j
for i in range(max_iters):
z = z*z + c
if z.real*z.real + z.imag*z.imag >= 4:
return i
return max_iters
def create_fractal(xmin, xmax, ymin, ymax, image, iters):
height, width = image.shape
pixel_size_x = (xmax - xmin)/width
pixel_size_y = (ymax - ymin)/height
for x in range(width):
real = xmin + x*pixel_size_x
for y in range(height):
imag = ymin + y*pixel_size_y
color = mandel(real, imag, iters)
image[y, x] = color
[17]:
gimage = np.zeros((1024, 1536), dtype=np.uint8)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50
start = time.time()
create_fractal(xmin, xmax, ymin, ymax, gimage, iters)
dt = time.time() - start
print("Mandelbrot created on CPU in %f s" % dt)
plt.grid(False)
plt.imshow(gimage, cmap='jet')
pass
Mandelbrot created on CPU in 16.704039 s
Note the use of mandel_cython
as a helper function that is only invoved in C, not in Python directly. For such functions, it is important to specify the return type.
[18]:
%%cython -a
cimport cython
cdef extern from "complex.h":
double cabs(double complex)
# color function for point at (x, y)
cdef unsigned char mandel_cython(double x, double y, int max_iters):
cdef double complex c, z
c = x + y*1j
z = 0.0j
for i in range(max_iters):
z = z*z + c
if cabs(z) >= 2:
return i
return max_iters
@cython.cdivision(True)
def create_fractal_cython(double xmin, double xmax, double ymin, double ymax, unsigned char[:, :] image, int iters):
cdef int x, y
cdef int height, width
cdef double pixel_size_x, pixel_size_y
cdef double real, imag
cdef unsigned char color
height = image.shape[0]
width = image.shape[1]
pixel_size_x = (xmax - xmin)/width
pixel_size_y = (ymax - ymin)/height
for x in range(width):
real = xmin + x*pixel_size_x
for y in range(height):
imag = ymin + y*pixel_size_y
color = mandel_cython(real, imag, iters)
image[y, x] = color
[18]:
Generated by Cython 0.29.14
Yellow lines hint at Python interaction.
Click on a line that starts with a "+
" to see the C code that Cython generated for it.
01:
+02: cimport cython
__pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
03:
04: cdef extern from "complex.h":
05: double cabs(double complex)
06:
07: # color function for point at (x, y)
+08: cdef unsigned char mandel_cython(double x, double y, int max_iters):
static unsigned char __pyx_f_46_cython_magic_d1e39b81221e13d9e79aa65988c61835_mandel_cython(double __pyx_v_x, double __pyx_v_y, int __pyx_v_max_iters) { __pyx_t_double_complex __pyx_v_c; __pyx_t_double_complex __pyx_v_z; int __pyx_v_i; unsigned char __pyx_r; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("mandel_cython", 0); /* … */ /* function exit code */ __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; }
09: cdef double complex c, z
10:
+11: c = x + y*1j
__pyx_v_c = __Pyx_c_sum_double(__pyx_t_double_complex_from_parts(__pyx_v_x, 0), __Pyx_c_prod_double(__pyx_t_double_complex_from_parts(__pyx_v_y, 0), __pyx_t_double_complex_from_parts(0, 1.0)));
+12: z = 0.0j
__pyx_v_z = __pyx_t_double_complex_from_parts(0, 0.0);
+13: for i in range(max_iters):
__pyx_t_1 = __pyx_v_max_iters; __pyx_t_2 = __pyx_t_1; for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3;
+14: z = z*z + c
__pyx_v_z = __Pyx_c_sum_double(__Pyx_c_prod_double(__pyx_v_z, __pyx_v_z), __pyx_v_c);
+15: if cabs(z) >= 2:
__pyx_t_4 = ((cabs(__pyx_v_z) >= 2.0) != 0); if (__pyx_t_4) { /* … */ } }
+16: return i
__pyx_r = __pyx_v_i; goto __pyx_L0;
+17: return max_iters
__pyx_r = __pyx_v_max_iters; goto __pyx_L0;
18:
19: @cython.cdivision(True)
+20: def create_fractal_cython(double xmin, double xmax, double ymin, double ymax, unsigned char[:, :] image, int iters):
/* Python wrapper */ static PyObject *__pyx_pw_46_cython_magic_d1e39b81221e13d9e79aa65988c61835_1create_fractal_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static PyMethodDef __pyx_mdef_46_cython_magic_d1e39b81221e13d9e79aa65988c61835_1create_fractal_cython = {"create_fractal_cython", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_46_cython_magic_d1e39b81221e13d9e79aa65988c61835_1create_fractal_cython, METH_VARARGS|METH_KEYWORDS, 0}; static PyObject *__pyx_pw_46_cython_magic_d1e39b81221e13d9e79aa65988c61835_1create_fractal_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { double __pyx_v_xmin; double __pyx_v_xmax; double __pyx_v_ymin; double __pyx_v_ymax; __Pyx_memviewslice __pyx_v_image = { 0, 0, { 0 }, { 0 }, { 0 } }; int __pyx_v_iters; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("create_fractal_cython (wrapper)", 0); { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_xmin,&__pyx_n_s_xmax,&__pyx_n_s_ymin,&__pyx_n_s_ymax,&__pyx_n_s_image,&__pyx_n_s_iters,0}; PyObject* values[6] = {0,0,0,0,0,0}; if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); CYTHON_FALLTHROUGH; case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); CYTHON_FALLTHROUGH; case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); CYTHON_FALLTHROUGH; case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); CYTHON_FALLTHROUGH; case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); CYTHON_FALLTHROUGH; case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); CYTHON_FALLTHROUGH; case 0: break; default: goto __pyx_L5_argtuple_error; } kw_args = PyDict_Size(__pyx_kwds); switch (pos_args) { case 0: if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_xmin)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; CYTHON_FALLTHROUGH; case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_xmax)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, 1); __PYX_ERR(0, 20, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_ymin)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, 2); __PYX_ERR(0, 20, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 3: if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_ymax)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, 3); __PYX_ERR(0, 20, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 4: if (likely((values[4] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_image)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, 4); __PYX_ERR(0, 20, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 5: if (likely((values[5] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_iters)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, 5); __PYX_ERR(0, 20, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "create_fractal_cython") < 0)) __PYX_ERR(0, 20, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 6) { goto __pyx_L5_argtuple_error; } else { values[0] = PyTuple_GET_ITEM(__pyx_args, 0); values[1] = PyTuple_GET_ITEM(__pyx_args, 1); values[2] = PyTuple_GET_ITEM(__pyx_args, 2); values[3] = PyTuple_GET_ITEM(__pyx_args, 3); values[4] = PyTuple_GET_ITEM(__pyx_args, 4); values[5] = PyTuple_GET_ITEM(__pyx_args, 5); } __pyx_v_xmin = __pyx_PyFloat_AsDouble(values[0]); if (unlikely((__pyx_v_xmin == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L3_error) __pyx_v_xmax = __pyx_PyFloat_AsDouble(values[1]); if (unlikely((__pyx_v_xmax == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L3_error) __pyx_v_ymin = __pyx_PyFloat_AsDouble(values[2]); if (unlikely((__pyx_v_ymin == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L3_error) __pyx_v_ymax = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_ymax == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L3_error) __pyx_v_image = __Pyx_PyObject_to_MemoryviewSlice_dsds_unsigned_char(values[4], PyBUF_WRITABLE); if (unlikely(!__pyx_v_image.memview)) __PYX_ERR(0, 20, __pyx_L3_error) __pyx_v_iters = __Pyx_PyInt_As_int(values[5]); if (unlikely((__pyx_v_iters == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L3_error) } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 20, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("_cython_magic_d1e39b81221e13d9e79aa65988c61835.create_fractal_cython", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_46_cython_magic_d1e39b81221e13d9e79aa65988c61835_create_fractal_cython(__pyx_self, __pyx_v_xmin, __pyx_v_xmax, __pyx_v_ymin, __pyx_v_ymax, __pyx_v_image, __pyx_v_iters); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_46_cython_magic_d1e39b81221e13d9e79aa65988c61835_create_fractal_cython(CYTHON_UNUSED PyObject *__pyx_self, double __pyx_v_xmin, double __pyx_v_xmax, double __pyx_v_ymin, double __pyx_v_ymax, __Pyx_memviewslice __pyx_v_image, int __pyx_v_iters) { int __pyx_v_x; int __pyx_v_y; int __pyx_v_height; int __pyx_v_width; double __pyx_v_pixel_size_x; double __pyx_v_pixel_size_y; double __pyx_v_real; double __pyx_v_imag; unsigned char __pyx_v_color; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("create_fractal_cython", 0); /* … */ /* function exit code */ __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; __pyx_L1_error:; __Pyx_AddTraceback("_cython_magic_d1e39b81221e13d9e79aa65988c61835.create_fractal_cython", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __PYX_XDEC_MEMVIEW(&__pyx_v_image, 1); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* … */ __pyx_tuple__19 = PyTuple_Pack(15, __pyx_n_s_xmin, __pyx_n_s_xmax, __pyx_n_s_ymin, __pyx_n_s_ymax, __pyx_n_s_image, __pyx_n_s_iters, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_height, __pyx_n_s_width, __pyx_n_s_pixel_size_x, __pyx_n_s_pixel_size_y, __pyx_n_s_real, __pyx_n_s_imag, __pyx_n_s_color); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 20, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__19); __Pyx_GIVEREF(__pyx_tuple__19); /* … */ __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_d1e39b81221e13d9e79aa65988c61835_1create_fractal_cython, NULL, __pyx_n_s_cython_magic_d1e39b81221e13d9e7); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 20, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_create_fractal_cython, __pyx_t_1) < 0) __PYX_ERR(0, 20, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(6, 0, 15, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jovyan_cache_ipython_cytho, __pyx_n_s_create_fractal_cython, 20, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 20, __pyx_L1_error)
21:
22: cdef int x, y
23: cdef int height, width
24: cdef double pixel_size_x, pixel_size_y
25: cdef double real, imag
26: cdef unsigned char color
27:
+28: height = image.shape[0]
__pyx_v_height = (__pyx_v_image.shape[0]);
+29: width = image.shape[1]
__pyx_v_width = (__pyx_v_image.shape[1]);
30:
+31: pixel_size_x = (xmax - xmin)/width
__pyx_v_pixel_size_x = ((__pyx_v_xmax - __pyx_v_xmin) / ((double)__pyx_v_width));
+32: pixel_size_y = (ymax - ymin)/height
__pyx_v_pixel_size_y = ((__pyx_v_ymax - __pyx_v_ymin) / ((double)__pyx_v_height));
33:
+34: for x in range(width):
__pyx_t_1 = __pyx_v_width; __pyx_t_2 = __pyx_t_1; for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_x = __pyx_t_3;
+35: real = xmin + x*pixel_size_x
__pyx_v_real = (__pyx_v_xmin + (__pyx_v_x * __pyx_v_pixel_size_x));
+36: for y in range(height):
__pyx_t_4 = __pyx_v_height; __pyx_t_5 = __pyx_t_4; for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { __pyx_v_y = __pyx_t_6;
+37: imag = ymin + y*pixel_size_y
__pyx_v_imag = (__pyx_v_ymin + (__pyx_v_y * __pyx_v_pixel_size_y));
+38: color = mandel_cython(real, imag, iters)
__pyx_v_color = __pyx_f_46_cython_magic_d1e39b81221e13d9e79aa65988c61835_mandel_cython(__pyx_v_real, __pyx_v_imag, __pyx_v_iters);
+39: image[y, x] = color
__pyx_t_7 = __pyx_v_y; __pyx_t_8 = __pyx_v_x; __pyx_t_9 = -1; if (__pyx_t_7 < 0) { __pyx_t_7 += __pyx_v_image.shape[0]; if (unlikely(__pyx_t_7 < 0)) __pyx_t_9 = 0; } else if (unlikely(__pyx_t_7 >= __pyx_v_image.shape[0])) __pyx_t_9 = 0; if (__pyx_t_8 < 0) { __pyx_t_8 += __pyx_v_image.shape[1]; if (unlikely(__pyx_t_8 < 0)) __pyx_t_9 = 1; } else if (unlikely(__pyx_t_8 >= __pyx_v_image.shape[1])) __pyx_t_9 = 1; if (unlikely(__pyx_t_9 != -1)) { __Pyx_RaiseBufferIndexError(__pyx_t_9); __PYX_ERR(0, 39, __pyx_L1_error) } *((unsigned char *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_image.data + __pyx_t_7 * __pyx_v_image.strides[0]) ) + __pyx_t_8 * __pyx_v_image.strides[1]) )) = __pyx_v_color; } }
[19]:
gimage = np.zeros((1024, 1536), dtype=np.uint8)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50
start = time.time()
create_fractal_cython(xmin, xmax, ymin, ymax, gimage, iters)
dt = time.time() - start
print("Mandelbrot created on CPU in %f s" % dt)
plt.grid(False)
plt.imshow(gimage, cmap='jet')
pass
Mandelbrot created on CPU in 0.498192 s
Using Cython
wtih ipyparallel
¶
We need to do some extra work to make sure the shared libary compiled with cython is available to the remote engines:
Compile a
named
shared module with the-n
flagUse
np.ndarray[dtype, ndim]
in place of memroy viewsfor example, double[:] becomes np.ndarray[np.float64_t, ndim=1]
Move the shared library to the
site-packages
directoryCython magic moules can be found in
~/.cache/ipython/cython
Import the modules remtoely in the usual ways
[20]:
%%cython -n cylib2
import cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
def f(int x):
return x*2
[21]:
import os
import glob
import site
import shutil
import sys
if sys.platform == "darwin":
src = glob.glob(os.path.join(os.path.expanduser('~/'), '.ipython', 'cython', 'cylib2*so'))[0]
else:
src = glob.glob(os.path.join(os.path.expanduser('~/'), '.cache', 'ipython', 'cython', 'cylib2*so'))[0]
dst = site.getsitepackages()[0]
shutil.copy(src, dst)
[21]:
'/opt/conda/lib/python3.6/site-packages/cylib2.cpython-36m-x86_64-linux-gnu.so'
[22]:
from ipyparallel import Client
[23]:
rc = Client()
dv = rc[:]
[24]:
with dv.sync_imports():
import cylib2
importing cylib2 on engine(s)
[25]:
dv.map_sync(cylib2.f, np.random.randint(0,10,10))
[25]:
[6, 12, 12, 16, 0, 8, 14, 12, 18, 4]
[ ]: