Parallel Programming Example

In [1]:
%matplotlib inline
In [2]:
import numpy as np
import matplotlib.pyplot as plt

Python version

In [3]:
def mandel(x, y, max_iters):
    c = complex(x, y)
    z = 0.0j
    for i in range(max_iters):
        z = z*z + c
        if abs(z) >= 4:
            return i
    return max_iters

def create_fractal(xmin, xmax, ymin, ymax, image, iters):
    height, width = image.shape

    pixel_size_x = (xmax - xmin)/width
    pixel_size_y = (ymax - ymin)/height

    for x in range(width):
        real = xmin + x*pixel_size_x
        for y in range(height):
            imag = ymin + y*pixel_size_y
            color = mandel(real, imag, iters)
            image[y, x]  = color

Note that we use a 16-fold smaller image for the Python version.

In [4]:
%%time

h, w = 2048//4, 3072//4
gimage = np.zeros((h, w), dtype=np.uint8)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50
create_fractal(xmin, xmax, ymin, ymax, gimage, iters)
plt.imshow(gimage, cmap='jet')
pass
CPU times: user 3.07 s, sys: 0 ns, total: 3.07 s
Wall time: 3.1 s
../_images/notebooks_S14C_Parallel_Programming_Extended_Example_6_1.png

Parallelism with multiprocessing

In [5]:
import multiprocessing
In [6]:
def mandel(x, y, max_iters):
    c = complex(x, y)
    z = 0.0j
    for i in range(max_iters):
        z = z*z + c
        if abs(z) >= 4:
            return i
    return max_iters

def create_fractal(xmin, xmax, ymin, ymax, image, iters):
    height, width = image.shape

    pixel_size_x = (xmax - xmin)/width
    pixel_size_y = (ymax - ymin)/height

    args = []
    for x in range(width):
        real = xmin + x*pixel_size_x
        for y in range(height):
            imag = ymin + y*pixel_size_y
            args.append((real, imag, iters))

    p = multiprocessing.Pool()
    colors = np.array(p.starmap(mandel, args), 'float')
    p.close()

    return colors.reshape((width, height)).T
In [7]:
%%time

h, w = 2048//4, 3072//4
gimage = np.zeros((h, w), dtype=np.uint8)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50
gimage = create_fractal(xmin, xmax, ymin, ymax, gimage, iters)
plt.imshow(gimage, cmap='jet')
pass
CPU times: user 2.3 s, sys: 180 ms, total: 2.48 s
Wall time: 2.53 s
../_images/notebooks_S14C_Parallel_Programming_Extended_Example_10_1.png

Parallelism in numba

In [8]:
from numba import jit, prange

JIT with numba

In [9]:
@jit(nopython=True, cache=True)
def mandel(x, y, max_iters):
    c = complex(x, y)
    z = 0
    for i in range(max_iters):
        z = z*z + c
        if abs(z) >= 4:
            return i
    return max_iters
In [10]:
@jit(nopython=True, cache=True)
def create_fractal(xmin, xmax, ymin, ymax, image, iters):
    height, width = image.shape

    pixel_size_x = (xmax - xmin)/width
    pixel_size_y = (ymax - ymin)/height

    for x in range(width):
        real = xmin + x*pixel_size_x
        for y in range(height):
            imag = ymin + y*pixel_size_y
            color = mandel(real, imag, iters)
            image[y, x]  = color
In [11]:
%%time

h, w = 2048, 3072
gimage = np.zeros((h, w), dtype=np.uint8)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50
create_fractal(xmin, xmax, ymin, ymax, gimage, iters)
plt.imshow(gimage, cmap='jet')
pass
CPU times: user 2.16 s, sys: 28 ms, total: 2.19 s
Wall time: 2.19 s
../_images/notebooks_S14C_Parallel_Programming_Extended_Example_16_1.png

JIT with numba and prange using parallel option

In [12]:
# function to be run in parallel needs `nogil=True`
@jit(nopython=True, nogil=True)
def mandel(x, y, max_iters):
    c = complex(x, y)
    z = 0.0j
    for i in range(max_iters):
        z = z*z + c
        if abs(z) >= 4:
            return i
    return max_iters

@jit(nopython=True, parallel=True)
def create_fractal(xmin, xmax, ymin, ymax, image, iters):
    height, width = image.shape

    pixel_size_x = (xmax - xmin)/width
    pixel_size_y = (ymax - ymin)/height

    # replace `range` with `prange`
    for x in prange(width):
        real = xmin + x*pixel_size_x
        for y in prange(height):
            imag = ymin + y*pixel_size_y
            color = mandel(real, imag, iters)
            image[y, x]  = color
In [13]:
%%time

h, w = 2048, 3072
gimage = np.zeros((h, w), dtype=np.uint8)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50
create_fractal(xmin, xmax, ymin, ymax, gimage, iters)
plt.imshow(gimage, cmap='jet')
pass
CPU times: user 2.44 s, sys: 4 ms, total: 2.44 s
Wall time: 1.06 s
../_images/notebooks_S14C_Parallel_Programming_Extended_Example_19_1.png

Parallelism in cython

In [14]:
import cython
In [15]:
%load_ext cython
In [16]:
%%cython -a

cimport cython

cdef extern from "complex.h":
    double cabs(double complex)

cdef unsigned char mandel_cython(double x, double y, int max_iters):
    cdef double complex c, z

    c = x + y*1j
    z = 0.0j
    for i in range(max_iters):
        z = z*z + c
        if cabs(z) >= 2:
            return i
    return max_iters

@cython.cdivision(True)
def create_fractal_cython(double xmin, double xmax, double ymin, double ymax,
                          unsigned char[:, :] image, int iters):

    cdef int x, y
    cdef int height, width
    cdef double pixel_size_x, pixel_size_y
    cdef double real, imag
    cdef unsigned char color

    height = image.shape[0]
    width = image.shape[1]

    pixel_size_x = (xmax - xmin)/width
    pixel_size_y = (ymax - ymin)/height

    for x in range(width):
        real = xmin + x*pixel_size_x
        for y in range(height):
            imag = ymin + y*pixel_size_y
            color = mandel_cython(real, imag, iters)
            image[y, x]  = color
Out[16]:
Cython: _cython_magic_a3593081874235a8f98aec77c6842da6.pyx

Generated by Cython 0.27.3

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

 01: 
+02: cimport cython
  __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 2, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 03: 
 04: cdef extern from "complex.h":
 05:     double cabs(double complex)
 06: 
+07: cdef unsigned char mandel_cython(double x, double y, int max_iters):
static unsigned char __pyx_f_46_cython_magic_a3593081874235a8f98aec77c6842da6_mandel_cython(double __pyx_v_x, double __pyx_v_y, int __pyx_v_max_iters) {
  __pyx_t_double_complex __pyx_v_c;
  __pyx_t_double_complex __pyx_v_z;
  int __pyx_v_i;
  unsigned char __pyx_r;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("mandel_cython", 0);
/* … */
  /* function exit code */
  __pyx_L0:;
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
 08:     cdef double complex c, z
 09: 
+10:     c = x + y*1j
  __pyx_v_c = __Pyx_c_sum_double(__pyx_t_double_complex_from_parts(__pyx_v_x, 0), __Pyx_c_prod_double(__pyx_t_double_complex_from_parts(__pyx_v_y, 0), __pyx_t_double_complex_from_parts(0, 1.0)));
+11:     z = 0.0j
  __pyx_v_z = __pyx_t_double_complex_from_parts(0, 0.0);
+12:     for i in range(max_iters):
  __pyx_t_1 = __pyx_v_max_iters;
  for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
    __pyx_v_i = __pyx_t_2;
+13:         z = z*z + c
    __pyx_v_z = __Pyx_c_sum_double(__Pyx_c_prod_double(__pyx_v_z, __pyx_v_z), __pyx_v_c);
+14:         if cabs(z) >= 2:
    __pyx_t_3 = ((cabs(__pyx_v_z) >= 2.0) != 0);
    if (__pyx_t_3) {
/* … */
    }
  }
+15:             return i
      __pyx_r = __pyx_v_i;
      goto __pyx_L0;
+16:     return max_iters
  __pyx_r = __pyx_v_max_iters;
  goto __pyx_L0;
 17: 
 18: @cython.cdivision(True)
+19: def create_fractal_cython(double xmin, double xmax, double ymin, double ymax,
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_a3593081874235a8f98aec77c6842da6_1create_fractal_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_46_cython_magic_a3593081874235a8f98aec77c6842da6_1create_fractal_cython = {"create_fractal_cython", (PyCFunction)__pyx_pw_46_cython_magic_a3593081874235a8f98aec77c6842da6_1create_fractal_cython, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_46_cython_magic_a3593081874235a8f98aec77c6842da6_1create_fractal_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  double __pyx_v_xmin;
  double __pyx_v_xmax;
  double __pyx_v_ymin;
  double __pyx_v_ymax;
  __Pyx_memviewslice __pyx_v_image = { 0, 0, { 0 }, { 0 }, { 0 } };
  int __pyx_v_iters;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("create_fractal_cython (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_xmin,&__pyx_n_s_xmax,&__pyx_n_s_ymin,&__pyx_n_s_ymax,&__pyx_n_s_image,&__pyx_n_s_iters,0};
    PyObject* values[6] = {0,0,0,0,0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_xmin)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_xmax)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, 1); __PYX_ERR(0, 19, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_ymin)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, 2); __PYX_ERR(0, 19, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_ymax)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, 3); __PYX_ERR(0, 19, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  4:
        if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_image)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, 4); __PYX_ERR(0, 19, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  5:
        if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_iters)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, 5); __PYX_ERR(0, 19, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "create_fractal_cython") < 0)) __PYX_ERR(0, 19, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 6) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
      values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
      values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
      values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
      values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
    }
    __pyx_v_xmin = __pyx_PyFloat_AsDouble(values[0]); if (unlikely((__pyx_v_xmin == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L3_error)
    __pyx_v_xmax = __pyx_PyFloat_AsDouble(values[1]); if (unlikely((__pyx_v_xmax == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L3_error)
    __pyx_v_ymin = __pyx_PyFloat_AsDouble(values[2]); if (unlikely((__pyx_v_ymin == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L3_error)
    __pyx_v_ymax = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_ymax == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L3_error)
    __pyx_v_image = __Pyx_PyObject_to_MemoryviewSlice_dsds_unsigned_char(values[4]); if (unlikely(!__pyx_v_image.memview)) __PYX_ERR(0, 20, __pyx_L3_error)
    __pyx_v_iters = __Pyx_PyInt_As_int(values[5]); if (unlikely((__pyx_v_iters == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L3_error)
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("create_fractal_cython", 1, 6, 6, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 19, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("_cython_magic_a3593081874235a8f98aec77c6842da6.create_fractal_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_46_cython_magic_a3593081874235a8f98aec77c6842da6_create_fractal_cython(__pyx_self, __pyx_v_xmin, __pyx_v_xmax, __pyx_v_ymin, __pyx_v_ymax, __pyx_v_image, __pyx_v_iters);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_a3593081874235a8f98aec77c6842da6_create_fractal_cython(CYTHON_UNUSED PyObject *__pyx_self, double __pyx_v_xmin, double __pyx_v_xmax, double __pyx_v_ymin, double __pyx_v_ymax, __Pyx_memviewslice __pyx_v_image, int __pyx_v_iters) {
  int __pyx_v_x;
  int __pyx_v_y;
  int __pyx_v_height;
  int __pyx_v_width;
  double __pyx_v_pixel_size_x;
  double __pyx_v_pixel_size_y;
  double __pyx_v_real;
  double __pyx_v_imag;
  unsigned char __pyx_v_color;
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("create_fractal_cython", 0);
/* … */
  /* function exit code */
  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1_error:;
  __Pyx_AddTraceback("_cython_magic_a3593081874235a8f98aec77c6842da6.create_fractal_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_image, 1);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple__20 = PyTuple_Pack(15, __pyx_n_s_xmin, __pyx_n_s_xmax, __pyx_n_s_ymin, __pyx_n_s_ymax, __pyx_n_s_image, __pyx_n_s_iters, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_height, __pyx_n_s_width, __pyx_n_s_pixel_size_x, __pyx_n_s_pixel_size_y, __pyx_n_s_real, __pyx_n_s_imag, __pyx_n_s_color); if (unlikely(!__pyx_tuple__20)) __PYX_ERR(0, 19, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__20);
  __Pyx_GIVEREF(__pyx_tuple__20);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_a3593081874235a8f98aec77c6842da6_1create_fractal_cython, NULL, __pyx_n_s_cython_magic_a3593081874235a8f9); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 19, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_create_fractal_cython, __pyx_t_1) < 0) __PYX_ERR(0, 19, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(6, 0, 15, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jovyan_cache_ipython_cytho, __pyx_n_s_create_fractal_cython, 19, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) __PYX_ERR(0, 19, __pyx_L1_error)
 20:                           unsigned char[:, :] image, int iters):
 21: 
 22:     cdef int x, y
 23:     cdef int height, width
 24:     cdef double pixel_size_x, pixel_size_y
 25:     cdef double real, imag
 26:     cdef unsigned char color
 27: 
+28:     height = image.shape[0]
  __pyx_v_height = (__pyx_v_image.shape[0]);
+29:     width = image.shape[1]
  __pyx_v_width = (__pyx_v_image.shape[1]);
 30: 
+31:     pixel_size_x = (xmax - xmin)/width
  __pyx_v_pixel_size_x = ((__pyx_v_xmax - __pyx_v_xmin) / ((double)__pyx_v_width));
+32:     pixel_size_y = (ymax - ymin)/height
  __pyx_v_pixel_size_y = ((__pyx_v_ymax - __pyx_v_ymin) / ((double)__pyx_v_height));
 33: 
+34:     for x in range(width):
  __pyx_t_1 = __pyx_v_width;
  for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) {
    __pyx_v_x = __pyx_t_2;
+35:         real = xmin + x*pixel_size_x
    __pyx_v_real = (__pyx_v_xmin + (__pyx_v_x * __pyx_v_pixel_size_x));
+36:         for y in range(height):
    __pyx_t_3 = __pyx_v_height;
    for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) {
      __pyx_v_y = __pyx_t_4;
+37:             imag = ymin + y*pixel_size_y
      __pyx_v_imag = (__pyx_v_ymin + (__pyx_v_y * __pyx_v_pixel_size_y));
+38:             color = mandel_cython(real, imag, iters)
      __pyx_v_color = __pyx_f_46_cython_magic_a3593081874235a8f98aec77c6842da6_mandel_cython(__pyx_v_real, __pyx_v_imag, __pyx_v_iters);
+39:             image[y, x]  = color
      __pyx_t_5 = __pyx_v_y;
      __pyx_t_6 = __pyx_v_x;
      __pyx_t_7 = -1;
      if (__pyx_t_5 < 0) {
        __pyx_t_5 += __pyx_v_image.shape[0];
        if (unlikely(__pyx_t_5 < 0)) __pyx_t_7 = 0;
      } else if (unlikely(__pyx_t_5 >= __pyx_v_image.shape[0])) __pyx_t_7 = 0;
      if (__pyx_t_6 < 0) {
        __pyx_t_6 += __pyx_v_image.shape[1];
        if (unlikely(__pyx_t_6 < 0)) __pyx_t_7 = 1;
      } else if (unlikely(__pyx_t_6 >= __pyx_v_image.shape[1])) __pyx_t_7 = 1;
      if (unlikely(__pyx_t_7 != -1)) {
        __Pyx_RaiseBufferIndexError(__pyx_t_7);
        __PYX_ERR(0, 39, __pyx_L1_error)
      }
      *((unsigned char *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_image.data + __pyx_t_5 * __pyx_v_image.strides[0]) ) + __pyx_t_6 * __pyx_v_image.strides[1]) )) = __pyx_v_color;
    }
  }
In [17]:
%%time

h, w = 2048, 3072
gimage = np.zeros((h, w), dtype=np.uint8)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50
create_fractal_cython(xmin, xmax, ymin, ymax, gimage, iters)
plt.imshow(gimage, cmap='jet')
pass
CPU times: user 2.16 s, sys: 16 ms, total: 2.17 s
Wall time: 2.17 s
../_images/notebooks_S14C_Parallel_Programming_Extended_Example_24_1.png

Parallel version with cython

In [18]:
%%cython --compile-args=-fopenmp --link-args=-fopenmp --force

import cython
from cython.parallel import parallel, prange

# All cdef functions need to be `nogil`
cdef extern from "complex.h" nogil:
    double cabs(double complex)

# All cdef functions need to be `nogil`
cdef unsigned char mandel_cython(double x, double y, int max_iters) nogil:
    cdef double complex c, z

    c = x + y*1j
    z = 0.0j
    for i in range(max_iters):
        z = z*z + c
        if cabs(z) >= 2:
            return i
    return max_iters

@cython.cdivision(True)
@cython.boundscheck(False)
@cython.wraparound(False)
def create_fractal_cython_par(double xmin, double xmax, double ymin, double ymax,
                              unsigned char[:, :] image, int iters):

    cdef int x, y
    cdef int height, width
    cdef double pixel_size_x, pixel_size_y
    cdef double real, imag
    cdef unsigned char color

    height = image.shape[0]
    width = image.shape[1]

    pixel_size_x = (xmax - xmin)/width
    pixel_size_y = (ymax - ymin)/height

    # use prange in parallel, nogil context
    with cython.nogil, parallel():
        for x in prange(width):
            real = xmin + x*pixel_size_x
            for y in prange(height):
                imag = ymin + y*pixel_size_y
                color = mandel_cython(real, imag, iters)
                image[y, x]  = color
In [19]:
%%time

h, w = 2048, 3072
gimage = np.zeros((h, w), dtype=np.uint8)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50
create_fractal_cython_par(xmin, xmax, ymin, ymax, gimage, iters)
plt.imshow(gimage, cmap='jet')
pass
CPU times: user 3.11 s, sys: 12 ms, total: 3.12 s
Wall time: 788 ms
../_images/notebooks_S14C_Parallel_Programming_Extended_Example_27_1.png

Parallelism using ipyparallel

In [3]:
from ipyparallel import Client
In [4]:
rc = Client()
rc.ids
Out[4]:
[0, 1, 2, 3]
In [5]:
dv = rc[:]
In [34]:
@dv.parallel(block=True)
@jit(nopython=True, cache=True)
def mandel(x, y, max_iters):
    c = complex(x, y)
    z = 0
    for i in range(max_iters):
        z = z*z + c
        if z.real * z.real + z.imag * z.imag >= 4: # abs(z) >= 4:
            return i
    return max_iters
In [35]:
# @jit(nopython=True, cache=True)
def create_fractal(xmin, xmax, ymin, ymax, image, iters):
    height, width = image.shape

    pixel_size_x = (xmax - xmin)/width
    pixel_size_y = (ymax - ymin)/height

    args = []
    for x in range(width):
        real = xmin + x*pixel_size_x
        for y in range(height):
            imag = ymin + y*pixel_size_y
            args.append((real, imag, iters))

    colors = np.array(mandel(np.array(args).T), 'float')
    return colors.reshape((width, height)).T
In [36]:
%%time

h, w = 2048//4, 3072//4
gimage = np.zeros((h, w), dtype=np.uint8)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50
gimage = create_fractal(xmin, xmax, ymin, ymax, gimage, iters)
plt.imshow(gimage, cmap='jet')
pass

TimeoutErrorTraceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/ipyparallel/client/asyncresult.py in _ordered_iter(self)
    688         try:
--> 689             rlist = self.get(0)
    690         except error.TimeoutError:

/opt/conda/lib/python3.6/site-packages/ipyparallel/client/asyncresult.py in get(self, timeout)
    170         else:
--> 171             raise error.TimeoutError("Result not ready.")
    172

TimeoutError: Result not ready.

During handling of the above exception, another exception occurred:

KeyboardInterruptTraceback (most recent call last)
<timed exec> in <module>()

<ipython-input-35-1fc9a5d0ad1f> in create_fractal(xmin, xmax, ymin, ymax, image, iters)
     13             args.append((real, imag, iters))
     14
---> 15     colors = np.array(mandel(np.array(args).T), 'float')
     16     return colors.reshape((width, height)).T

/opt/conda/lib/python3.6/site-packages/ipyparallel/client/asyncresult.py in __iter__(self)
    668     def __iter__(self):
    669         it = self._ordered_iter if self.ordered else self._unordered_iter
--> 670         for r in it():
    671             yield r
    672

/opt/conda/lib/python3.6/site-packages/ipyparallel/client/asyncresult.py in _ordered_iter(self)
    692             evt = Event()
    693             for child in self._children:
--> 694                 self._wait_for_child(child, evt=evt)
    695                 for r in self._yield_child_results(child):
    696                     yield r

/opt/conda/lib/python3.6/site-packages/ipyparallel/client/asyncresult.py in _wait_for_child(child, evt, timeout)
    380         evt.clear()
    381         child.add_done_callback(lambda f: evt.set())
--> 382         evt.wait(timeout)
    383
    384     # asynchronous iterator:

/opt/conda/lib/python3.6/threading.py in wait(self, timeout)
    549             signaled = self._flag
    550             if not signaled:
--> 551                 signaled = self._cond.wait(timeout)
    552             return signaled
    553

/opt/conda/lib/python3.6/threading.py in wait(self, timeout)
    297             else:
    298                 if timeout > 0:
--> 299                     gotit = waiter.acquire(True, timeout)
    300                 else:
    301                     gotit = waiter.acquire(False)

KeyboardInterrupt:
In [30]:
mandel.map?