{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Streaming and functional programming" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# standard libraries\n", "import math\n", "import os\n", "import gzip\n", "from glob import glob\n", "import itertools as it\n", "\n", "# 3rd party libraries\n", "import toolz as tz\n", "import toolz.curried as c\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Understanding `itertools`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `repeat`" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "abc\n", "abc\n", "abc\n", "abc\n", "abc\n" ] } ], "source": [ "for i, item in enumerate(it.repeat('abc')):\n", " print(item)\n", " if i >= 4:\n", " break" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['abc', 'abc', 'abc', 'abc', 'abc']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.repeat('abc', 5))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Replicating behavior using reular Python" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def repeat(obj, n=None):\n", " if n is None:\n", " while True:\n", " yield obj\n", " else:\n", " for i in range(n):\n", " yield obj" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "abc\n", "abc\n", "abc\n", "abc\n", "abc\n" ] } ], "source": [ "for i, item in enumerate(repeat('abc')):\n", " print(item)\n", " if i >= 4:\n", " break" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['abc', 'abc', 'abc', 'abc', 'abc']" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(repeat('abc', 5))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `cycle`" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a']" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(it.cycle('abc'), 10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Replicating behavior using regular Python" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def cycle(obj):\n", " while True:\n", " yield from obj" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a']" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(cycle('abc'), 10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `count`" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 3, 4]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(it.count(), 5))" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[2, 3, 4, 5, 6]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(it.count(2), 5))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[2, 4, 6, 8, 10]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(it.count(2,2), 5))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Replicating behavior using regular Python" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def count(start=0, step=1):\n", " while True:\n", " yield start\n", " start += step" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 3, 4]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(it.count(), 5))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[2, 3, 4, 5, 6]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(count(2), 5))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[2, 4, 6, 8, 10]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(count(2,2), 5))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `chain`" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 'a', 'b', 'c', 0, 1, 2, 3]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(it.chain(range(3), 'abc', it.count()), 10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Replicating behavior with regular Python" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def chain(*args):\n", " for arg in args:\n", " yield from arg" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 'a', 'b', 'c', 0, 1, 2, 3]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(chain(range(3), 'abc', it.count()), 10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `chain.from_iterable`" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 'a', 'b', 'c', 0, 1, 2, 3]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(it.chain.from_iterable([range(3), 'abc', it.count()]),10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Replicating behavior with regular Python" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def chain_from_iterable(args):\n", " for arg in args:\n", " yield from arg" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 'a', 'b', 'c', 0, 1, 2, 3]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.islice(chain_from_iterable([range(3), 'abc', it.count()]),10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `product`" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('a', 0, 'A'),\n", " ('a', 0, 'B'),\n", " ('a', 1, 'A'),\n", " ('a', 1, 'B'),\n", " ('b', 0, 'A'),\n", " ('b', 0, 'B'),\n", " ('b', 1, 'A'),\n", " ('b', 1, 'B')]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.product('ab', range(2), 'AB'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Replicating behavior with regular Python" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Note**: Just as with lists, addition for tuples is concatenation." ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('a', 0, 'A')" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "('a', 0) + ('A',)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def product(*args):\n", " if not args:\n", " yield ()\n", " else:\n", " for items in product(*args[:-1]):\n", " for item in args[-1]:\n", " yield tuple(items) + (item,)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('a', 0, 'A'),\n", " ('a', 0, 'B'),\n", " ('a', 1, 'A'),\n", " ('a', 1, 'B'),\n", " ('b', 0, 'A'),\n", " ('b', 0, 'B'),\n", " ('b', 1, 'A'),\n", " ('b', 1, 'B')]" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(product('ab', range(2), 'AB'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `permuations`" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'c'), ('c', 'a'), ('c', 'b')]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.permutations('abc', 2))" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('a', 'b', 'c'),\n", " ('a', 'c', 'b'),\n", " ('b', 'a', 'c'),\n", " ('b', 'c', 'a'),\n", " ('c', 'a', 'b'),\n", " ('c', 'b', 'a')]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.permutations('abc'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Replicating behavior with regular Python" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def permutations(xs, n=None):\n", " if n is None:\n", " n = len(xs)\n", " args = repeat(xs, n)\n", " for item in product(*args):\n", " if len(set(item)) == n:\n", " yield item" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'c'), ('c', 'a'), ('c', 'b')]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(it.permutations('abc', 2))" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('a', 'b', 'c'),\n", " ('a', 'c', 'b'),\n", " ('b', 'a', 'c'),\n", " ('b', 'c', 'a'),\n", " ('c', 'a', 'b'),\n", " ('c', 'b', 'a')]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(permutations('abc'))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }