{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# MongoDB\n", "\n", "MongoDB is a document database. It stores JSON objects.\n", "\n", "- [Documentation](https://docs.mongodb.com)\n", "- [Query selectors](https://docs.mongodb.com/manual/reference/operator/query/#query-selectors)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from pymongo import MongoClient, GEOSPHERE\n", "from bson.objectid import ObjectId\n", "from bson.son import SON" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import requests\n", "from bson import json_util" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import collections\n", "from pathlib import Path" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from pprint import pprint" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set up" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This connects to the MongoDB daemon" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "client = MongoClient()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This specifies the database. It does not matter if it does not exist." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "client.drop_database('starwars')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "db = client.starwars" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This specifies a `collection`" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "people = db.people" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check what collections are in the database. Note that the `people` collection is only created when the first value is inserted." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db.list_collection_names()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get Data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "base_url = 'https://swapi.co/api'" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "resp = requests.get('https://swapi.co/api/people/1')\n", "data = resp.json()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'birth_year': '19BBY',\n", " 'created': '2014-12-09T13:50:51.644000Z',\n", " 'edited': '2014-12-20T21:17:56.891000Z',\n", " 'eye_color': 'blue',\n", " 'films': ['https://swapi.co/api/films/2/',\n", " 'https://swapi.co/api/films/6/',\n", " 'https://swapi.co/api/films/3/',\n", " 'https://swapi.co/api/films/1/',\n", " 'https://swapi.co/api/films/7/'],\n", " 'gender': 'male',\n", " 'hair_color': 'blond',\n", " 'height': '172',\n", " 'homeworld': 'https://swapi.co/api/planets/1/',\n", " 'mass': '77',\n", " 'name': 'Luke Skywalker',\n", " 'skin_color': 'fair',\n", " 'species': ['https://swapi.co/api/species/1/'],\n", " 'starships': ['https://swapi.co/api/starships/12/',\n", " 'https://swapi.co/api/starships/22/'],\n", " 'url': 'https://swapi.co/api/people/1/',\n", " 'vehicles': ['https://swapi.co/api/vehicles/14/',\n", " 'https://swapi.co/api/vehicles/30/']}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def convert(data):\n", " \"\"\"Nest inner API calls one level dowwn.\"\"\"\n", " for key, val in data.items():\n", " if isinstance(val, list):\n", " data[key] = [requests.get(value).json() for value in val \n", " if isinstance(value, str) and value.startswith(base_url)]\n", " else:\n", " if isinstance(val, str) and val.startswith(base_url):\n", " data[key] = requests.get(val).json() \n", " return data" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "luke = convert(data)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def convert_str(s):\n", " if '.' in s:\n", " try:\n", " s = float(s)\n", " except:\n", " pass\n", " else:\n", " try:\n", " s = int(s)\n", " except:\n", " pass\n", " return s\n", "\n", "def to_numeric(data):\n", " for key in data:\n", " val = data[key]\n", " if isinstance(val, str):\n", " data[key] = convert_str(val)\n", " elif isinstance(val, dict):\n", " for k, v in val.items():\n", " if isinstance(v, str):\n", " val[k] = convert_str(v)\n", " elif isinstance(val, list):\n", " for i, item in enumerate(val):\n", " if isinstance(item, str):\n", " data[key][i] = convert_str(item)\n", " elif isinstance(item, dict):\n", " for k, v in item.items():\n", " if isinstance(v, str):\n", " data[key][i][k] = convert_str(v) \n", " return data" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "luke = to_numeric(luke)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'birth_year': '19BBY',\n", " 'created': '2014-12-09T13:50:51.644000Z',\n", " 'edited': '2014-12-20T21:17:56.891000Z',\n", " 'eye_color': 'blue',\n", " 'films': [{'characters': ['https://swapi.co/api/people/1/',\n", " 'https://swapi.co/api/people/2/',\n", " 'https://swapi.co/api/people/3/',\n", " 'https://swapi.co/api/people/4/',\n", " 'https://swapi.co/api/people/5/',\n", " 'https://swapi.co/api/people/10/',\n", " 'https://swapi.co/api/people/13/',\n", " 'https://swapi.co/api/people/14/',\n", " 'https://swapi.co/api/people/18/',\n", " 'https://swapi.co/api/people/20/',\n", " 'https://swapi.co/api/people/21/',\n", " 'https://swapi.co/api/people/22/',\n", " 'https://swapi.co/api/people/23/',\n", " 'https://swapi.co/api/people/24/',\n", " 'https://swapi.co/api/people/25/',\n", " 'https://swapi.co/api/people/26/'],\n", " 'created': '2014-12-12T11:26:24.656000Z',\n", " 'director': 'Irvin Kershner',\n", " 'edited': '2017-04-19T10:57:29.544256Z',\n", " 'episode_id': 5,\n", " 'opening_crawl': 'It is a dark time for the\\r\\n'\n", " 'Rebellion. Although the Death\\r\\n'\n", " 'Star has been destroyed,\\r\\n'\n", " 'Imperial troops have driven the\\r\\n'\n", " 'Rebel forces from their hidden\\r\\n'\n", " 'base and pursued them across\\r\\n'\n", " 'the galaxy.\\r\\n'\n", " '\\r\\n'\n", " 'Evading the dreaded Imperial\\r\\n'\n", " 'Starfleet, a group of freedom\\r\\n'\n", " 'fighters led by Luke Skywalker\\r\\n'\n", " 'has established a new secret\\r\\n'\n", " 'base on the remote ice world\\r\\n'\n", " 'of Hoth.\\r\\n'\n", " '\\r\\n'\n", " 'The evil lord Darth Vader,\\r\\n'\n", " 'obsessed with finding young\\r\\n'\n", " 'Skywalker, has dispatched\\r\\n'\n", " 'thousands of remote probes into\\r\\n'\n", " 'the far reaches of space....',\n", " 'planets': ['https://swapi.co/api/planets/4/',\n", " 'https://swapi.co/api/planets/5/',\n", " 'https://swapi.co/api/planets/6/',\n", " 'https://swapi.co/api/planets/27/'],\n", " 'producer': 'Gary Kurtz, Rick McCallum',\n", " 'release_date': '1980-05-17',\n", " 'species': ['https://swapi.co/api/species/6/',\n", " 'https://swapi.co/api/species/7/',\n", " 'https://swapi.co/api/species/3/',\n", " 'https://swapi.co/api/species/2/',\n", " 'https://swapi.co/api/species/1/'],\n", " 'starships': ['https://swapi.co/api/starships/15/',\n", " 'https://swapi.co/api/starships/10/',\n", " 'https://swapi.co/api/starships/11/',\n", " 'https://swapi.co/api/starships/12/',\n", " 'https://swapi.co/api/starships/21/',\n", " 'https://swapi.co/api/starships/22/',\n", " 'https://swapi.co/api/starships/23/',\n", " 'https://swapi.co/api/starships/3/',\n", " 'https://swapi.co/api/starships/17/'],\n", " 'title': 'The Empire Strikes Back',\n", " 'url': 'https://swapi.co/api/films/2/',\n", " 'vehicles': ['https://swapi.co/api/vehicles/8/',\n", " 'https://swapi.co/api/vehicles/14/',\n", " 'https://swapi.co/api/vehicles/16/',\n", " 'https://swapi.co/api/vehicles/18/',\n", " 'https://swapi.co/api/vehicles/19/',\n", " 'https://swapi.co/api/vehicles/20/']},\n", " {'characters': ['https://swapi.co/api/people/1/',\n", " 'https://swapi.co/api/people/2/',\n", " 'https://swapi.co/api/people/3/',\n", " 'https://swapi.co/api/people/4/',\n", " 'https://swapi.co/api/people/5/',\n", " 'https://swapi.co/api/people/6/',\n", " 'https://swapi.co/api/people/7/',\n", " 'https://swapi.co/api/people/10/',\n", " 'https://swapi.co/api/people/11/',\n", " 'https://swapi.co/api/people/12/',\n", " 'https://swapi.co/api/people/13/',\n", " 'https://swapi.co/api/people/20/',\n", " 'https://swapi.co/api/people/21/',\n", " 'https://swapi.co/api/people/33/',\n", " 'https://swapi.co/api/people/46/',\n", " 'https://swapi.co/api/people/51/',\n", " 'https://swapi.co/api/people/52/',\n", " 'https://swapi.co/api/people/53/',\n", " 'https://swapi.co/api/people/54/',\n", " 'https://swapi.co/api/people/55/',\n", " 'https://swapi.co/api/people/56/',\n", " 'https://swapi.co/api/people/58/',\n", " 'https://swapi.co/api/people/63/',\n", " 'https://swapi.co/api/people/64/',\n", " 'https://swapi.co/api/people/67/',\n", " 'https://swapi.co/api/people/68/',\n", " 'https://swapi.co/api/people/75/',\n", " 'https://swapi.co/api/people/78/',\n", " 'https://swapi.co/api/people/79/',\n", " 'https://swapi.co/api/people/80/',\n", " 'https://swapi.co/api/people/81/',\n", " 'https://swapi.co/api/people/82/',\n", " 'https://swapi.co/api/people/83/',\n", " 'https://swapi.co/api/people/35/'],\n", " 'created': '2014-12-20T18:49:38.403000Z',\n", " 'director': 'George Lucas',\n", " 'edited': '2015-04-11T09:45:44.862122Z',\n", " 'episode_id': 3,\n", " 'opening_crawl': 'War! The Republic is crumbling\\r\\n'\n", " 'under attacks by the ruthless\\r\\n'\n", " 'Sith Lord, Count Dooku.\\r\\n'\n", " 'There are heroes on both sides.\\r\\n'\n", " 'Evil is everywhere.\\r\\n'\n", " '\\r\\n'\n", " 'In a stunning move, the\\r\\n'\n", " 'fiendish droid leader, General\\r\\n'\n", " 'Grievous, has swept into the\\r\\n'\n", " 'Republic capital and kidnapped\\r\\n'\n", " 'Chancellor Palpatine, leader of\\r\\n'\n", " 'the Galactic Senate.\\r\\n'\n", " '\\r\\n'\n", " 'As the Separatist Droid Army\\r\\n'\n", " 'attempts to flee the besieged\\r\\n'\n", " 'capital with their valuable\\r\\n'\n", " 'hostage, two Jedi Knights lead a\\r\\n'\n", " 'desperate mission to rescue the\\r\\n'\n", " 'captive Chancellor....',\n", " 'planets': ['https://swapi.co/api/planets/2/',\n", " 'https://swapi.co/api/planets/5/',\n", " 'https://swapi.co/api/planets/8/',\n", " 'https://swapi.co/api/planets/9/',\n", " 'https://swapi.co/api/planets/12/',\n", " 'https://swapi.co/api/planets/13/',\n", " 'https://swapi.co/api/planets/14/',\n", " 'https://swapi.co/api/planets/15/',\n", " 'https://swapi.co/api/planets/16/',\n", " 'https://swapi.co/api/planets/17/',\n", " 'https://swapi.co/api/planets/18/',\n", " 'https://swapi.co/api/planets/19/',\n", " 'https://swapi.co/api/planets/1/'],\n", " 'producer': 'Rick McCallum',\n", " 'release_date': '2005-05-19',\n", " 'species': ['https://swapi.co/api/species/19/',\n", " 'https://swapi.co/api/species/33/',\n", " 'https://swapi.co/api/species/2/',\n", " 'https://swapi.co/api/species/3/',\n", " 'https://swapi.co/api/species/36/',\n", " 'https://swapi.co/api/species/37/',\n", " 'https://swapi.co/api/species/6/',\n", " 'https://swapi.co/api/species/1/',\n", " 'https://swapi.co/api/species/34/',\n", " 'https://swapi.co/api/species/15/',\n", " 'https://swapi.co/api/species/35/',\n", " 'https://swapi.co/api/species/20/',\n", " 'https://swapi.co/api/species/23/',\n", " 'https://swapi.co/api/species/24/',\n", " 'https://swapi.co/api/species/25/',\n", " 'https://swapi.co/api/species/26/',\n", " 'https://swapi.co/api/species/27/',\n", " 'https://swapi.co/api/species/28/',\n", " 'https://swapi.co/api/species/29/',\n", " 'https://swapi.co/api/species/30/'],\n", " 'starships': ['https://swapi.co/api/starships/48/',\n", " 'https://swapi.co/api/starships/59/',\n", " 'https://swapi.co/api/starships/61/',\n", " 'https://swapi.co/api/starships/32/',\n", " 'https://swapi.co/api/starships/63/',\n", " 'https://swapi.co/api/starships/64/',\n", " 'https://swapi.co/api/starships/65/',\n", " 'https://swapi.co/api/starships/66/',\n", " 'https://swapi.co/api/starships/74/',\n", " 'https://swapi.co/api/starships/75/',\n", " 'https://swapi.co/api/starships/2/',\n", " 'https://swapi.co/api/starships/68/'],\n", " 'title': 'Revenge of the Sith',\n", " 'url': 'https://swapi.co/api/films/6/',\n", " 'vehicles': ['https://swapi.co/api/vehicles/33/',\n", " 'https://swapi.co/api/vehicles/50/',\n", " 'https://swapi.co/api/vehicles/53/',\n", " 'https://swapi.co/api/vehicles/56/',\n", " 'https://swapi.co/api/vehicles/60/',\n", " 'https://swapi.co/api/vehicles/62/',\n", " 'https://swapi.co/api/vehicles/67/',\n", " 'https://swapi.co/api/vehicles/69/',\n", " 'https://swapi.co/api/vehicles/70/',\n", " 'https://swapi.co/api/vehicles/71/',\n", " 'https://swapi.co/api/vehicles/72/',\n", " 'https://swapi.co/api/vehicles/73/',\n", " 'https://swapi.co/api/vehicles/76/']},\n", " {'characters': ['https://swapi.co/api/people/1/',\n", " 'https://swapi.co/api/people/2/',\n", " 'https://swapi.co/api/people/3/',\n", " 'https://swapi.co/api/people/4/',\n", " 'https://swapi.co/api/people/5/',\n", " 'https://swapi.co/api/people/10/',\n", " 'https://swapi.co/api/people/13/',\n", " 'https://swapi.co/api/people/14/',\n", " 'https://swapi.co/api/people/16/',\n", " 'https://swapi.co/api/people/18/',\n", " 'https://swapi.co/api/people/20/',\n", " 'https://swapi.co/api/people/21/',\n", " 'https://swapi.co/api/people/22/',\n", " 'https://swapi.co/api/people/25/',\n", " 'https://swapi.co/api/people/27/',\n", " 'https://swapi.co/api/people/28/',\n", " 'https://swapi.co/api/people/29/',\n", " 'https://swapi.co/api/people/30/',\n", " 'https://swapi.co/api/people/31/',\n", " 'https://swapi.co/api/people/45/'],\n", " 'created': '2014-12-18T10:39:33.255000Z',\n", " 'director': 'Richard Marquand',\n", " 'edited': '2015-04-11T09:46:05.220365Z',\n", " 'episode_id': 6,\n", " 'opening_crawl': 'Luke Skywalker has returned to\\r\\n'\n", " 'his home planet of Tatooine in\\r\\n'\n", " 'an attempt to rescue his\\r\\n'\n", " 'friend Han Solo from the\\r\\n'\n", " 'clutches of the vile gangster\\r\\n'\n", " 'Jabba the Hutt.\\r\\n'\n", " '\\r\\n'\n", " 'Little does Luke know that the\\r\\n'\n", " 'GALACTIC EMPIRE has secretly\\r\\n'\n", " 'begun construction on a new\\r\\n'\n", " 'armored space station even\\r\\n'\n", " 'more powerful than the first\\r\\n'\n", " 'dreaded Death Star.\\r\\n'\n", " '\\r\\n'\n", " 'When completed, this ultimate\\r\\n'\n", " 'weapon will spell certain doom\\r\\n'\n", " 'for the small band of rebels\\r\\n'\n", " 'struggling to restore freedom\\r\\n'\n", " 'to the galaxy...',\n", " 'planets': ['https://swapi.co/api/planets/5/',\n", " 'https://swapi.co/api/planets/7/',\n", " 'https://swapi.co/api/planets/8/',\n", " 'https://swapi.co/api/planets/9/',\n", " 'https://swapi.co/api/planets/1/'],\n", " 'producer': 'Howard G. Kazanjian, George Lucas, Rick McCallum',\n", " 'release_date': '1983-05-25',\n", " 'species': ['https://swapi.co/api/species/1/',\n", " 'https://swapi.co/api/species/2/',\n", " 'https://swapi.co/api/species/3/',\n", " 'https://swapi.co/api/species/5/',\n", " 'https://swapi.co/api/species/6/',\n", " 'https://swapi.co/api/species/8/',\n", " 'https://swapi.co/api/species/9/',\n", " 'https://swapi.co/api/species/10/',\n", " 'https://swapi.co/api/species/15/'],\n", " 'starships': ['https://swapi.co/api/starships/15/',\n", " 'https://swapi.co/api/starships/10/',\n", " 'https://swapi.co/api/starships/11/',\n", " 'https://swapi.co/api/starships/12/',\n", " 'https://swapi.co/api/starships/22/',\n", " 'https://swapi.co/api/starships/23/',\n", " 'https://swapi.co/api/starships/27/',\n", " 'https://swapi.co/api/starships/28/',\n", " 'https://swapi.co/api/starships/29/',\n", " 'https://swapi.co/api/starships/3/',\n", " 'https://swapi.co/api/starships/17/',\n", " 'https://swapi.co/api/starships/2/'],\n", " 'title': 'Return of the Jedi',\n", " 'url': 'https://swapi.co/api/films/3/',\n", " 'vehicles': ['https://swapi.co/api/vehicles/8/',\n", " 'https://swapi.co/api/vehicles/16/',\n", " 'https://swapi.co/api/vehicles/18/',\n", " 'https://swapi.co/api/vehicles/19/',\n", " 'https://swapi.co/api/vehicles/24/',\n", " 'https://swapi.co/api/vehicles/25/',\n", " 'https://swapi.co/api/vehicles/26/',\n", " 'https://swapi.co/api/vehicles/30/']},\n", " {'characters': ['https://swapi.co/api/people/1/',\n", " 'https://swapi.co/api/people/2/',\n", " 'https://swapi.co/api/people/3/',\n", " 'https://swapi.co/api/people/4/',\n", " 'https://swapi.co/api/people/5/',\n", " 'https://swapi.co/api/people/6/',\n", " 'https://swapi.co/api/people/7/',\n", " 'https://swapi.co/api/people/8/',\n", " 'https://swapi.co/api/people/9/',\n", " 'https://swapi.co/api/people/10/',\n", " 'https://swapi.co/api/people/12/',\n", " 'https://swapi.co/api/people/13/',\n", " 'https://swapi.co/api/people/14/',\n", " 'https://swapi.co/api/people/15/',\n", " 'https://swapi.co/api/people/16/',\n", " 'https://swapi.co/api/people/18/',\n", " 'https://swapi.co/api/people/19/',\n", " 'https://swapi.co/api/people/81/'],\n", " 'created': '2014-12-10T14:23:31.880000Z',\n", " 'director': 'George Lucas',\n", " 'edited': '2015-04-11T09:46:52.774897Z',\n", " 'episode_id': 4,\n", " 'opening_crawl': 'It is a period of civil war.\\r\\n'\n", " 'Rebel spaceships, striking\\r\\n'\n", " 'from a hidden base, have won\\r\\n'\n", " 'their first victory against\\r\\n'\n", " 'the evil Galactic Empire.\\r\\n'\n", " '\\r\\n'\n", " 'During the battle, Rebel\\r\\n'\n", " 'spies managed to steal secret\\r\\n'\n", " \"plans to the Empire's\\r\\n\"\n", " 'ultimate weapon, the DEATH\\r\\n'\n", " 'STAR, an armored space\\r\\n'\n", " 'station with enough power\\r\\n'\n", " 'to destroy an entire planet.\\r\\n'\n", " '\\r\\n'\n", " \"Pursued by the Empire's\\r\\n\"\n", " 'sinister agents, Princess\\r\\n'\n", " 'Leia races home aboard her\\r\\n'\n", " 'starship, custodian of the\\r\\n'\n", " 'stolen plans that can save her\\r\\n'\n", " 'people and restore\\r\\n'\n", " 'freedom to the galaxy....',\n", " 'planets': ['https://swapi.co/api/planets/2/',\n", " 'https://swapi.co/api/planets/3/',\n", " 'https://swapi.co/api/planets/1/'],\n", " 'producer': 'Gary Kurtz, Rick McCallum',\n", " 'release_date': '1977-05-25',\n", " 'species': ['https://swapi.co/api/species/5/',\n", " 'https://swapi.co/api/species/3/',\n", " 'https://swapi.co/api/species/2/',\n", " 'https://swapi.co/api/species/1/',\n", " 'https://swapi.co/api/species/4/'],\n", " 'starships': ['https://swapi.co/api/starships/2/',\n", " 'https://swapi.co/api/starships/3/',\n", " 'https://swapi.co/api/starships/5/',\n", " 'https://swapi.co/api/starships/9/',\n", " 'https://swapi.co/api/starships/10/',\n", " 'https://swapi.co/api/starships/11/',\n", " 'https://swapi.co/api/starships/12/',\n", " 'https://swapi.co/api/starships/13/'],\n", " 'title': 'A New Hope',\n", " 'url': 'https://swapi.co/api/films/1/',\n", " 'vehicles': ['https://swapi.co/api/vehicles/4/',\n", " 'https://swapi.co/api/vehicles/6/',\n", " 'https://swapi.co/api/vehicles/7/',\n", " 'https://swapi.co/api/vehicles/8/']},\n", " {'characters': ['https://swapi.co/api/people/1/',\n", " 'https://swapi.co/api/people/3/',\n", " 'https://swapi.co/api/people/5/',\n", " 'https://swapi.co/api/people/13/',\n", " 'https://swapi.co/api/people/14/',\n", " 'https://swapi.co/api/people/27/',\n", " 'https://swapi.co/api/people/84/',\n", " 'https://swapi.co/api/people/85/',\n", " 'https://swapi.co/api/people/86/',\n", " 'https://swapi.co/api/people/87/',\n", " 'https://swapi.co/api/people/88/'],\n", " 'created': '2015-04-17T06:51:30.504780Z',\n", " 'director': 'J. J. Abrams',\n", " 'edited': '2015-12-17T14:31:47.617768Z',\n", " 'episode_id': 7,\n", " 'opening_crawl': 'Luke Skywalker has vanished.\\r\\n'\n", " 'In his absence, the sinister\\r\\n'\n", " 'FIRST ORDER has risen from\\r\\n'\n", " 'the ashes of the Empire\\r\\n'\n", " 'and will not rest until\\r\\n'\n", " 'Skywalker, the last Jedi,\\r\\n'\n", " 'has been destroyed.\\r\\n'\n", " ' \\r\\n'\n", " 'With the support of the\\r\\n'\n", " 'REPUBLIC, General Leia Organa\\r\\n'\n", " 'leads a brave RESISTANCE.\\r\\n'\n", " 'She is desperate to find her\\r\\n'\n", " 'brother Luke and gain his\\r\\n'\n", " 'help in restoring peace and\\r\\n'\n", " 'justice to the galaxy.\\r\\n'\n", " ' \\r\\n'\n", " 'Leia has sent her most daring\\r\\n'\n", " 'pilot on a secret mission\\r\\n'\n", " 'to Jakku, where an old ally\\r\\n'\n", " 'has discovered a clue to\\r\\n'\n", " \"Luke's whereabouts....\",\n", " 'planets': ['https://swapi.co/api/planets/61/'],\n", " 'producer': 'Kathleen Kennedy, J. J. Abrams, Bryan Burk',\n", " 'release_date': '2015-12-11',\n", " 'species': ['https://swapi.co/api/species/3/',\n", " 'https://swapi.co/api/species/2/',\n", " 'https://swapi.co/api/species/1/'],\n", " 'starships': ['https://swapi.co/api/starships/77/',\n", " 'https://swapi.co/api/starships/10/'],\n", " 'title': 'The Force Awakens',\n", " 'url': 'https://swapi.co/api/films/7/',\n", " 'vehicles': []}],\n", " 'gender': 'male',\n", " 'hair_color': 'blond',\n", " 'height': 172,\n", " 'homeworld': {'climate': 'arid',\n", " 'created': '2014-12-09T13:50:49.641000Z',\n", " 'diameter': 10465,\n", " 'edited': '2014-12-21T20:48:04.175778Z',\n", " 'films': ['https://swapi.co/api/films/5/',\n", " 'https://swapi.co/api/films/4/',\n", " 'https://swapi.co/api/films/6/',\n", " 'https://swapi.co/api/films/3/',\n", " 'https://swapi.co/api/films/1/'],\n", " 'gravity': '1 standard',\n", " 'name': 'Tatooine',\n", " 'orbital_period': 304,\n", " 'population': 200000,\n", " 'residents': ['https://swapi.co/api/people/1/',\n", " 'https://swapi.co/api/people/2/',\n", " 'https://swapi.co/api/people/4/',\n", " 'https://swapi.co/api/people/6/',\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 'https://swapi.co/api/people/7/',\n", " 'https://swapi.co/api/people/8/',\n", " 'https://swapi.co/api/people/9/',\n", " 'https://swapi.co/api/people/11/',\n", " 'https://swapi.co/api/people/43/',\n", " 'https://swapi.co/api/people/62/'],\n", " 'rotation_period': 23,\n", " 'surface_water': 1,\n", " 'terrain': 'desert',\n", " 'url': 'https://swapi.co/api/planets/1/'},\n", " 'mass': 77,\n", " 'name': 'Luke Skywalker',\n", " 'skin_color': 'fair',\n", " 'species': [{'average_height': 180,\n", " 'average_lifespan': 120,\n", " 'classification': 'mammal',\n", " 'created': '2014-12-10T13:52:11.567000Z',\n", " 'designation': 'sentient',\n", " 'edited': '2015-04-17T06:59:55.850671Z',\n", " 'eye_colors': 'brown, blue, green, hazel, grey, amber',\n", " 'films': ['https://swapi.co/api/films/2/',\n", " 'https://swapi.co/api/films/7/',\n", " 'https://swapi.co/api/films/5/',\n", " 'https://swapi.co/api/films/4/',\n", " 'https://swapi.co/api/films/6/',\n", " 'https://swapi.co/api/films/3/',\n", " 'https://swapi.co/api/films/1/'],\n", " 'hair_colors': 'blonde, brown, black, red',\n", " 'homeworld': 'https://swapi.co/api/planets/9/',\n", " 'language': 'Galactic Basic',\n", " 'name': 'Human',\n", " 'people': ['https://swapi.co/api/people/1/',\n", " 'https://swapi.co/api/people/4/',\n", " 'https://swapi.co/api/people/5/',\n", " 'https://swapi.co/api/people/6/',\n", " 'https://swapi.co/api/people/7/',\n", " 'https://swapi.co/api/people/9/',\n", " 'https://swapi.co/api/people/10/',\n", " 'https://swapi.co/api/people/11/',\n", " 'https://swapi.co/api/people/12/',\n", " 'https://swapi.co/api/people/14/',\n", " 'https://swapi.co/api/people/18/',\n", " 'https://swapi.co/api/people/19/',\n", " 'https://swapi.co/api/people/21/',\n", " 'https://swapi.co/api/people/22/',\n", " 'https://swapi.co/api/people/25/',\n", " 'https://swapi.co/api/people/26/',\n", " 'https://swapi.co/api/people/28/',\n", " 'https://swapi.co/api/people/29/',\n", " 'https://swapi.co/api/people/32/',\n", " 'https://swapi.co/api/people/34/',\n", " 'https://swapi.co/api/people/43/',\n", " 'https://swapi.co/api/people/51/',\n", " 'https://swapi.co/api/people/60/',\n", " 'https://swapi.co/api/people/61/',\n", " 'https://swapi.co/api/people/62/',\n", " 'https://swapi.co/api/people/66/',\n", " 'https://swapi.co/api/people/67/',\n", " 'https://swapi.co/api/people/68/',\n", " 'https://swapi.co/api/people/69/',\n", " 'https://swapi.co/api/people/74/',\n", " 'https://swapi.co/api/people/81/',\n", " 'https://swapi.co/api/people/84/',\n", " 'https://swapi.co/api/people/85/',\n", " 'https://swapi.co/api/people/86/',\n", " 'https://swapi.co/api/people/35/'],\n", " 'skin_colors': 'caucasian, black, asian, hispanic',\n", " 'url': 'https://swapi.co/api/species/1/'}],\n", " 'starships': [{'MGLT': 100,\n", " 'cargo_capacity': 110,\n", " 'consumables': '1 week',\n", " 'cost_in_credits': 149999,\n", " 'created': '2014-12-12T11:19:05.340000Z',\n", " 'crew': 1,\n", " 'edited': '2014-12-22T17:35:44.491233Z',\n", " 'films': ['https://swapi.co/api/films/2/',\n", " 'https://swapi.co/api/films/3/',\n", " 'https://swapi.co/api/films/1/'],\n", " 'hyperdrive_rating': 1.0,\n", " 'length': 12.5,\n", " 'manufacturer': 'Incom Corporation',\n", " 'max_atmosphering_speed': 1050,\n", " 'model': 'T-65 X-wing',\n", " 'name': 'X-wing',\n", " 'passengers': 0,\n", " 'pilots': ['https://swapi.co/api/people/1/',\n", " 'https://swapi.co/api/people/9/',\n", " 'https://swapi.co/api/people/18/',\n", " 'https://swapi.co/api/people/19/'],\n", " 'starship_class': 'Starfighter',\n", " 'url': 'https://swapi.co/api/starships/12/'},\n", " {'MGLT': 50,\n", " 'cargo_capacity': 80000,\n", " 'consumables': '2 months',\n", " 'cost_in_credits': 240000,\n", " 'created': '2014-12-15T13:04:47.235000Z',\n", " 'crew': 6,\n", " 'edited': '2014-12-22T17:35:44.795405Z',\n", " 'films': ['https://swapi.co/api/films/2/',\n", " 'https://swapi.co/api/films/3/'],\n", " 'hyperdrive_rating': 1.0,\n", " 'length': 20,\n", " 'manufacturer': 'Sienar Fleet Systems',\n", " 'max_atmosphering_speed': 850,\n", " 'model': 'Lambda-class T-4a shuttle',\n", " 'name': 'Imperial shuttle',\n", " 'passengers': 20,\n", " 'pilots': ['https://swapi.co/api/people/1/',\n", " 'https://swapi.co/api/people/13/',\n", " 'https://swapi.co/api/people/14/'],\n", " 'starship_class': 'Armed government transport',\n", " 'url': 'https://swapi.co/api/starships/22/'}],\n", " 'url': {'birth_year': '19BBY',\n", " 'created': '2014-12-09T13:50:51.644000Z',\n", " 'edited': '2014-12-20T21:17:56.891000Z',\n", " 'eye_color': 'blue',\n", " 'films': ['https://swapi.co/api/films/2/',\n", " 'https://swapi.co/api/films/6/',\n", " 'https://swapi.co/api/films/3/',\n", " 'https://swapi.co/api/films/1/',\n", " 'https://swapi.co/api/films/7/'],\n", " 'gender': 'male',\n", " 'hair_color': 'blond',\n", " 'height': 172,\n", " 'homeworld': 'https://swapi.co/api/planets/1/',\n", " 'mass': 77,\n", " 'name': 'Luke Skywalker',\n", " 'skin_color': 'fair',\n", " 'species': ['https://swapi.co/api/species/1/'],\n", " 'starships': ['https://swapi.co/api/starships/12/',\n", " 'https://swapi.co/api/starships/22/'],\n", " 'url': 'https://swapi.co/api/people/1/',\n", " 'vehicles': ['https://swapi.co/api/vehicles/14/',\n", " 'https://swapi.co/api/vehicles/30/']},\n", " 'vehicles': [{'cargo_capacity': 10,\n", " 'consumables': 'none',\n", " 'cost_in_credits': 'unknown',\n", " 'created': '2014-12-15T12:22:12Z',\n", " 'crew': 2,\n", " 'edited': '2014-12-22T18:21:15.623033Z',\n", " 'films': ['https://swapi.co/api/films/2/'],\n", " 'length': 4.5,\n", " 'manufacturer': 'Incom corporation',\n", " 'max_atmosphering_speed': 650,\n", " 'model': 't-47 airspeeder',\n", " 'name': 'Snowspeeder',\n", " 'passengers': 0,\n", " 'pilots': ['https://swapi.co/api/people/1/',\n", " 'https://swapi.co/api/people/18/'],\n", " 'url': 'https://swapi.co/api/vehicles/14/',\n", " 'vehicle_class': 'airspeeder'},\n", " {'cargo_capacity': 4,\n", " 'consumables': '1 day',\n", " 'cost_in_credits': 8000,\n", " 'created': '2014-12-18T11:20:04.625000Z',\n", " 'crew': 1,\n", " 'edited': '2014-12-22T18:21:15.920537Z',\n", " 'films': ['https://swapi.co/api/films/3/'],\n", " 'length': 3,\n", " 'manufacturer': 'Aratech Repulsor Company',\n", " 'max_atmosphering_speed': 360,\n", " 'model': '74-Z speeder bike',\n", " 'name': 'Imperial Speeder Bike',\n", " 'passengers': 1,\n", " 'pilots': ['https://swapi.co/api/people/1/',\n", " 'https://swapi.co/api/people/5/'],\n", " 'url': 'https://swapi.co/api/vehicles/30/',\n", " 'vehicle_class': 'speeder'}]}\n" ] } ], "source": [ "pprint(luke)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Insertion" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Single inserts" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "result = people.insert_one(data)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['people']" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db.list_collection_names()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Bulk inserts" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "xs = [to_numeric(convert(requests.get('https://swapi.co/api/people/%i' % i).json())) \n", " for i in range(2, 11)]" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true, "scrolled": false }, "outputs": [], "source": [ "result = people.insert_many(xs)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[ObjectId('5b8fc6062b84ad7967ef9949'),\n", " ObjectId('5b8fc6062b84ad7967ef994a'),\n", " ObjectId('5b8fc6062b84ad7967ef994b'),\n", " ObjectId('5b8fc6062b84ad7967ef994c'),\n", " ObjectId('5b8fc6062b84ad7967ef994d'),\n", " ObjectId('5b8fc6062b84ad7967ef994e'),\n", " ObjectId('5b8fc6062b84ad7967ef994f'),\n", " ObjectId('5b8fc6062b84ad7967ef9950'),\n", " ObjectId('5b8fc6062b84ad7967ef9951')]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.inserted_ids" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Queries" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'_id': ObjectId('5b8fc5ee2b84ad7967ef9948'),\n", " 'eye_color': 'blue',\n", " 'hair_color': 'blond',\n", " 'name': 'Luke Skywalker',\n", " 'skin_color': 'fair'}" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "people.find_one(\n", " # search criteria\n", " {'name': 'Luke Skywalker'}, \n", " # values to return\n", " {'name': True, \n", " 'hair_color': True,\n", " 'skin_color': True, \n", " 'eye_color': True\n", " } \n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using object ID\n", "\n", "Note that ObjectID is NOT a string. You must convert a string to ObjectID before use." ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ObjectId('5b8fc6062b84ad7967ef9949')" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.inserted_ids[0]" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'_id': ObjectId('5b8fc6062b84ad7967ef9949'),\n", " 'eye_color': 'yellow',\n", " 'hair_color': 'n/a',\n", " 'name': 'C-3PO',\n", " 'skin_color': 'gold'}" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "people.find_one(\n", " result.inserted_ids[0],\n", " {'name': True, 'hair_color': True, 'skin_color': True, 'eye_color': True}\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Bulk queries" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Luke Skywalker\n", "Darth Vader\n", "Owen Lars\n", "Biggs Darklighter\n", "Obi-Wan Kenobi\n" ] } ], "source": [ "for person in people.find({'gender': 'male'}):\n", " print(person['name'])" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'name': 'Luke Skywalker', 'gender': 'male', '_id': ObjectId('5b8fc5ee2b84ad7967ef9948')}\n", "{'name': 'Darth Vader', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef994b')}\n", "{'name': 'Owen Lars', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef994d')}\n", "{'name': 'Biggs Darklighter', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef9950')}\n", "{'name': 'Obi-Wan Kenobi', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef9951')}\n" ] } ], "source": [ "for x in people.find(\n", " {'gender': 'male'}, \n", " {\n", " 'name': True,\n", " 'gender': True\n", " }\n", "): \n", " print(x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Using regex search" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'gender': 'male', 'name': 'Luke Skywalker'}\n", "{'gender': 'female', 'name': 'Leia Organa'}\n" ] } ], "source": [ "for x in people.find(\n", " {\n", " 'name': {'$regex': '^L'},\n", " },\n", " {\n", " 'name': True, \n", " 'gender': True, \n", " '_id': False\n", " }\n", "):\n", " pprint(x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Alternative using Python regular expressions." ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import re\n", "\n", "name_pat = re.compile(r'^l', re.IGNORECASE)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'gender': 'male', 'name': 'Luke Skywalker'}\n", "{'gender': 'female', 'name': 'Leia Organa'}\n" ] } ], "source": [ "for x in people.find(\n", " {\n", " 'name': name_pat,\n", " },\n", " {\n", " 'name': True,\n", " 'gender': True,\n", " '_id': False\n", " }\n", "):\n", " pprint(x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Using relational operators" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'mass': 136, 'name': 'Darth Vader'}\n", "{'mass': 120, 'name': 'Owen Lars'}\n" ] } ], "source": [ "for x in people.find(\n", " {\n", " 'mass': {'$gt': 100},\n", " },\n", " {\n", " 'name': True, \n", " 'mass': True, \n", " '_id': False\n", " }\n", "):\n", " pprint(x)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mass_range = {'$lt': 100, '$gt': 50}" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'mass': 77, 'name': 'Luke Skywalker'}\n", "{'mass': 75, 'name': 'C-3PO'}\n", "{'mass': 75, 'name': 'Beru Whitesun lars'}\n", "{'mass': 84, 'name': 'Biggs Darklighter'}\n", "{'mass': 77, 'name': 'Obi-Wan Kenobi'}\n" ] } ], "source": [ "for x in people.find(\n", " {\n", " 'mass': mass_range,\n", " },\n", " {\n", " 'name': True, \n", " 'mass': True, \n", " '_id': False\n", " }\n", "):\n", " print(x)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'mass': 77, 'name': 'Luke Skywalker'}\n", "{'mass': 75, 'name': 'C-3PO'}\n", "{'mass': 75, 'name': 'Beru Whitesun lars'}\n", "{'mass': 84, 'name': 'Biggs Darklighter'}\n", "{'mass': 77, 'name': 'Obi-Wan Kenobi'}\n" ] } ], "source": [ "for x in people.find(\n", " {\n", " 'mass': mass_range,\n", " },\n", " {\n", " 'name': True, \n", " 'mass': True,\n", " '_id': False\n", " }\n", "):\n", " pprint(x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Nested search" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'name': 'Luke Skywalker', 'species': [{'name': 'Human'}]}\n", "{'name': 'Darth Vader', 'species': [{'name': 'Human'}]}\n", "{'name': 'Leia Organa', 'species': [{'name': 'Human'}]}\n", "{'name': 'Owen Lars', 'species': [{'name': 'Human'}]}\n", "{'name': 'Beru Whitesun lars', 'species': [{'name': 'Human'}]}\n", "{'name': 'Biggs Darklighter', 'species': [{'name': 'Human'}]}\n", "{'name': 'Obi-Wan Kenobi', 'species': [{'name': 'Human'}]}\n" ] } ], "source": [ "for x in people.find(\n", " {\n", " 'species.name': 'Human',\n", " },\n", " {\n", " 'name': True, \n", " 'species.name': True, \n", " '_id': False\n", " }\n", "):\n", " pprint(x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Matching multiple criteria" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'name': 'Luke Skywalker',\n", " 'starships': [{'cost_in_credits': 149999,\n", " 'max_atmosphering_speed': 1050,\n", " 'passengers': 0},\n", " {'cost_in_credits': 240000,\n", " 'max_atmosphering_speed': 850,\n", " 'passengers': 20}]}\n", "{'name': 'Obi-Wan Kenobi',\n", " 'starships': [{'cost_in_credits': 180000,\n", " 'max_atmosphering_speed': 1150,\n", " 'passengers': 0},\n", " {'cost_in_credits': 125000000,\n", " 'max_atmosphering_speed': 1050,\n", " 'passengers': 48247},\n", " {'cost_in_credits': 'unknown',\n", " 'max_atmosphering_speed': 1050,\n", " 'passengers': 3},\n", " {'cost_in_credits': 320000,\n", " 'max_atmosphering_speed': 1500,\n", " 'passengers': 0},\n", " {'cost_in_credits': 168000,\n", " 'max_atmosphering_speed': 1100,\n", " 'passengers': 0}]}\n" ] } ], "source": [ "for x in people.find(\n", " {\n", " 'starships.cost_in_credits': {'$lt': 250000},\n", " 'starships.max_atmosphering_speed': {'$gt': 500},\n", " 'starships.passengers': {'$gt': 0}\n", " },\n", " {\n", " 'name': True, \n", " 'starship.name': True, \n", " 'starships.max_atmosphering_speed': True,\n", " 'starships.passengers': True,\n", " 'starships.cost_in_credits': True, \n", " '_id': False\n", " }\n", "):\n", " pprint(x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Matching multiple criteria simultaneously" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'name': 'Luke Skywalker',\n", " 'starships': [{'cost_in_credits': 149999,\n", " 'max_atmosphering_speed': 1050,\n", " 'passengers': 0},\n", " {'cost_in_credits': 240000,\n", " 'max_atmosphering_speed': 850,\n", " 'passengers': 20}]}\n" ] } ], "source": [ "for x in people.find(\n", " {\n", " 'starships': {\n", " '$elemMatch': { \n", " 'cost_in_credits': {'$lt': 250000},\n", " 'max_atmosphering_speed': {'$gt': 500},\n", " 'passengers': {'$gt': 1}\n", " }\n", " }\n", " },\n", " {\n", " 'name': True, \n", " 'starship.name': True, \n", " 'starships.max_atmosphering_speed': True,\n", " 'starships.passengers': True,\n", " 'starships.cost_in_credits': True, \n", " '_id': False\n", " }\n", "):\n", " pprint(x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Indexes" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ ">" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "people.find({}).explain" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'executionStats': {'allPlansExecution': [],\n", " 'executionStages': {'advanced': 1,\n", " 'direction': 'forward',\n", " 'docsExamined': 10,\n", " 'executionTimeMillisEstimate': 0,\n", " 'filter': {'name': {'$eq': 'Luke Skywalker'}},\n", " 'invalidates': 0,\n", " 'isEOF': 1,\n", " 'nReturned': 1,\n", " 'needTime': 10,\n", " 'needYield': 0,\n", " 'restoreState': 0,\n", " 'saveState': 0,\n", " 'stage': 'COLLSCAN',\n", " 'works': 12},\n", " 'executionSuccess': True,\n", " 'executionTimeMillis': 0,\n", " 'nReturned': 1,\n", " 'totalDocsExamined': 10,\n", " 'totalKeysExamined': 0},\n", " 'ok': 1.0,\n", " 'queryPlanner': {'indexFilterSet': False,\n", " 'namespace': 'starwars.people',\n", " 'parsedQuery': {'name': {'$eq': 'Luke Skywalker'}},\n", " 'plannerVersion': 1,\n", " 'rejectedPlans': [],\n", " 'winningPlan': {'direction': 'forward',\n", " 'filter': {'name': {'$eq': 'Luke Skywalker'}},\n", " 'stage': 'COLLSCAN'}},\n", " 'serverInfo': {'gitVersion': 'fc1573ba18aee42f97a3bb13b67af7d837826b47',\n", " 'host': 'eris',\n", " 'port': 27017,\n", " 'version': '4.0.2'}}" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "people.find({'name': 'Luke Skywalker'}).explain()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'name_1'" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "people.create_index('name')" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'executionStats': {'allPlansExecution': [],\n", " 'executionStages': {'advanced': 1,\n", " 'alreadyHasObj': 0,\n", " 'docsExamined': 1,\n", " 'executionTimeMillisEstimate': 0,\n", " 'inputStage': {'advanced': 1,\n", " 'direction': 'forward',\n", " 'dupsDropped': 0,\n", " 'dupsTested': 0,\n", " 'executionTimeMillisEstimate': 0,\n", " 'indexBounds': {'name': ['[\"Luke Skywalker\", \"Luke Skywalker\"]']},\n", " 'indexName': 'name_1',\n", " 'indexVersion': 2,\n", " 'invalidates': 0,\n", " 'isEOF': 1,\n", " 'isMultiKey': False,\n", " 'isPartial': False,\n", " 'isSparse': False,\n", " 'isUnique': False,\n", " 'keyPattern': {'name': 1},\n", " 'keysExamined': 1,\n", " 'multiKeyPaths': {'name': []},\n", " 'nReturned': 1,\n", " 'needTime': 0,\n", " 'needYield': 0,\n", " 'restoreState': 0,\n", " 'saveState': 0,\n", " 'seeks': 1,\n", " 'seenInvalidated': 0,\n", " 'stage': 'IXSCAN',\n", " 'works': 2},\n", " 'invalidates': 0,\n", " 'isEOF': 1,\n", " 'nReturned': 1,\n", " 'needTime': 0,\n", " 'needYield': 0,\n", " 'restoreState': 0,\n", " 'saveState': 0,\n", " 'stage': 'FETCH',\n", " 'works': 2},\n", " 'executionSuccess': True,\n", " 'executionTimeMillis': 3,\n", " 'nReturned': 1,\n", " 'totalDocsExamined': 1,\n", " 'totalKeysExamined': 1},\n", " 'ok': 1.0,\n", " 'queryPlanner': {'indexFilterSet': False,\n", " 'namespace': 'starwars.people',\n", " 'parsedQuery': {'name': {'$eq': 'Luke Skywalker'}},\n", " 'plannerVersion': 1,\n", " 'rejectedPlans': [],\n", " 'winningPlan': {'inputStage': {'direction': 'forward',\n", " 'indexBounds': {'name': ['[\"Luke Skywalker\", \"Luke Skywalker\"]']},\n", " 'indexName': 'name_1',\n", " 'indexVersion': 2,\n", " 'isMultiKey': False,\n", " 'isPartial': False,\n", " 'isSparse': False,\n", " 'isUnique': False,\n", " 'keyPattern': {'name': 1},\n", " 'multiKeyPaths': {'name': []},\n", " 'stage': 'IXSCAN'},\n", " 'stage': 'FETCH'}},\n", " 'serverInfo': {'gitVersion': 'fc1573ba18aee42f97a3bb13b67af7d837826b47',\n", " 'host': 'eris',\n", " 'port': 27017,\n", " 'version': '4.0.2'}}" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "people.find({'name': 'Luke Skywalker'}).explain()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Aggregate Queries" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "people.count_documents({'species.name': 'Human'})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using aggregate\n", "\n", "The `aggregate` function runs a pipeline of commands, and uses the `$group` operator to summarize results." ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from collections import OrderedDict" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Filter and count" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": true }, "outputs": [], "source": [ "cmds = [\n", " {'$match': {'species.name': 'Human'}},\n", " {'$group': {'_id': '$species.name', 'count': {'$sum': 1}}},\n", "]" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'_id': ['Human'], 'count': 7}]\n" ] } ], "source": [ "pprint(list(people.aggregate(cmds)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Filter and find total mass" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": true }, "outputs": [], "source": [ "cmds = [\n", " {'$match': {'species.name': 'Human'}},\n", " {'$group': {'_id': '$species.name', 'total_mass': {'$sum': '$mass'}}},\n", "]" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'_id': ['Human'], 'total_mass': 618}]\n" ] } ], "source": [ "pprint(list(people.aggregate(cmds)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Total mass of each species" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": true }, "outputs": [], "source": [ "cmds = [\n", " {'$group': {'_id': '$species.name', 'total_mass': {'$sum': '$mass'}}},\n", "]" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'_id': ['Droid'], 'total_mass': 139}, {'_id': ['Human'], 'total_mass': 618}]\n" ] } ], "source": [ "pprint(list(people.aggregate(cmds)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Total and average mass of each species" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": true }, "outputs": [], "source": [ "cmds = [\n", " {\n", " '$group': {\n", " '_id': '$species.name',\n", " 'total_mass': {'$sum': '$mass'},\n", " 'avg_mass': {'$avg': '$mass'}\n", " }\n", " },\n", "]" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'_id': ['Droid'], 'avg_mass': 46.333333333333336, 'total_mass': 139},\n", " {'_id': ['Human'], 'avg_mass': 88.28571428571429, 'total_mass': 618}]\n" ] } ], "source": [ "pprint(list(people.aggregate(cmds)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using MapReduce\n", "\n", "With `MapReduce` you get the full power of JavaScript, but it is more complex and often less efficient. You should use `aggregate` in preference to `map_reduce` in most cases.\n", "\n", "- In the map stage, you create a (key, value) pair\n", "- In the reduce stage, you perform a reduction (e.g. sum) of the values associated with each key" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from bson.code import Code" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Count the number by eye_color." ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": true, "scrolled": false }, "outputs": [], "source": [ "mapper = Code('''\n", "function() {\n", " emit(this.eye_color, 1);\n", "}\n", "''')\n", "\n", "reducer = Code('''\n", "function (key, values) {\n", " var total = 0;\n", " for (var i = 0; i < values.length; i++) {\n", " total += values[i];\n", " }\n", " return total;\n", "}\n", "''')\n", "\n", "result = people.map_reduce(\n", " mapper, \n", " reducer, \n", " 'result1'\n", ")" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'_id': 'blue', 'value': 3.0}\n", "{'_id': 'blue-gray', 'value': 1.0}\n", "{'_id': 'brown', 'value': 2.0}\n", "{'_id': 'red', 'value': 2.0}\n", "{'_id': 'yellow', 'value': 2.0}\n" ] } ], "source": [ "for doc in result.find():\n", " pprint(doc)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The output is also stored in the `result1` collection we specified." ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'_id': 'blue', 'value': 3.0},\n", " {'_id': 'blue-gray', 'value': 1.0},\n", " {'_id': 'brown', 'value': 2.0},\n", " {'_id': 'red', 'value': 2.0},\n", " {'_id': 'yellow', 'value': 2.0}]" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(db.result1.find())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Using JavaScript Array functions to simplify code." ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mapper = Code('''\n", "function() {\n", " emit(this.eye_color, 1);\n", "}\n", "''')\n", "\n", "reducer = Code('''\n", "function (key, values) {\n", " return Array.sum(values);\n", "}\n", "''')\n", "\n", "result = people.map_reduce(\n", " mapper, \n", " reducer, \n", " 'result2'\n", ")" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'_id': 'blue', 'value': 3.0}\n", "{'_id': 'blue-gray', 'value': 1.0}\n", "{'_id': 'brown', 'value': 2.0}\n", "{'_id': 'red', 'value': 2.0}\n", "{'_id': 'yellow', 'value': 2.0}\n" ] } ], "source": [ "for doc in result.find():\n", " pprint(doc)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Find avergae mass by gender." ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mapper = Code('''\n", "function() {\n", " emit(this.gender, this.mass);\n", "}\n", "''')\n", "\n", "reducer = Code('''\n", "function (key, values) {\n", " return Array.avg(values);\n", "}\n", "''')\n", "\n", "result = people.map_reduce(\n", " mapper, \n", " reducer, \n", " 'result3'\n", ")" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'_id': 'female', 'value': 62.0}\n", "{'_id': 'male', 'value': 98.8}\n", "{'_id': 'n/a', 'value': 46.333333333333336}\n" ] } ], "source": [ "for doc in result.find():\n", " pprint(doc)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Count number of members in each species" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mapper = Code('''\n", "function() {\n", " this.species.map(function(z) {\n", " emit(z.name, 1);\n", " })\n", "}\n", "''')\n", "\n", "reducer = Code('''\n", "function (key, values) {\n", " return Array.sum(values);\n", "}\n", "''')\n", "\n", "result = people.map_reduce(\n", " mapper, \n", " reducer, \n", " 'result3'\n", ")" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'_id': 'Droid', 'value': 3.0}\n", "{'_id': 'Human', 'value': 7.0}\n" ] } ], "source": [ "for doc in result.find():\n", " pprint(doc)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Geospatial queries" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": true }, "outputs": [], "source": [ "crime = db.crime" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import json" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": true }, "outputs": [], "source": [ "path = 'data/crime-mapping.geojson'\n", "\n", "with open(path) as f:\n", " datastore = json.load(f)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "collapsed": true }, "outputs": [], "source": [ "results = crime.insert_many(datastore['features'])" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'_id': ObjectId('5b8fc6472b84ad7967ef9952'),\n", " 'geometry': {'coordinates': [-78.78200313, 35.760212065], 'type': 'Point'},\n", " 'properties': {'activity_date': None,\n", " 'apartment_complex': None,\n", " 'beat_number': '112',\n", " 'chrgcnt': None,\n", " 'crime_category': 'ALL OTHER',\n", " 'crime_type': 'ALL OTHER - ESCAPE FROM CUSTODY OR RESIST ARREST',\n", " 'crimeday': 'THURSDAY',\n", " 'date_from': '2017-11-30',\n", " 'date_to': '11/30/2017',\n", " 'district': 'D3',\n", " 'domestic': 'N',\n", " 'incident_number': '17010528',\n", " 'lat': 35.760212065,\n", " 'location_category': 'TOWN OWNED',\n", " 'lon': -78.78200313,\n", " 'map_reference': 'P027',\n", " 'offensecategory': 'All Other Offenses',\n", " 'period': ['Everything', 'Last Year'],\n", " 'phxcommunity': 'No',\n", " 'phxrecordstatus': None,\n", " 'phxstatus': None,\n", " 'radio': 'Everything,Last Year',\n", " 'record': 3145,\n", " 'residential_subdivision': 'SHOPPES OF KILDAIRE',\n", " 'street': 'KILDAIRE FARM RD',\n", " 'time_from': -62135553600,\n", " 'time_to': -62135553600,\n", " 'timeframe': ['Last Year'],\n", " 'ucr': '2650',\n", " 'violentproperty': 'All Other'},\n", " 'type': 'Feature'}" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crime.find_one({})" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'geometry': {'coordinates': [-78.78200313, 35.760212065], 'type': 'Point'}}" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crime.find_one({},\n", " {\n", " 'geometry': 1,\n", " '_id': 0,\n", " }\n", " )" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'geometry_2dsphere'" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crime.create_index([('geometry', GEOSPHERE)])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "List 5 crimes near the location" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'properties': {'crime_type': 'ALL OTHER - ESCAPE FROM CUSTODY OR RESIST '\n", " 'ARREST',\n", " 'date_from': '2017-11-30'}}\n", "{'properties': {'crime_type': 'LARCENY - AUTO PARTS OR ACCESSORIES',\n", " 'date_from': '2018-03-20'}}\n", "{'properties': {'crime_type': 'COUNTERFEITING - USING',\n", " 'date_from': '2018-08-05'}}\n", "{'properties': {'crime_type': 'DRUGS - DRUG VIOLATIONS '\n", " '(POSS./SELL/MAN./DEL./TRNSPRT/CULT.)',\n", " 'date_from': '2017-11-30'}}\n", "{'properties': {'crime_type': 'VANDALISM - DAMAGE TO PROPERTY',\n", " 'date_from': '2018-03-26'}}\n" ] } ], "source": [ "loc = SON([('type', 'Point'), ('coordinates', [-78.78200313, 35.760212065])])\n", "\n", "for doc in crime.find(\n", " {\n", " 'geometry' : SON([('$near', {'$geometry' : loc})])\n", " },\n", " {\n", " '_id': 0,\n", " 'properties.crime_type': 1,\n", " 'properties.date_from': 1\n", " }\n", ").limit(5):\n", " pprint(doc)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "List crimes committed nearby (within 200 m)" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'geometry': {'coordinates': [-78.78102423, 35.7607323]},\n", " 'properties': {'crime_type': 'ASSAULT - SIMPLE - ALL OTHER',\n", " 'date_from': '2018-02-14'}}\n", "{'geometry': {'coordinates': [-78.78131931, 35.761138061]},\n", " 'properties': {'crime_type': 'VANDALISM - GRAFFITI',\n", " 'date_from': '2018-07-20'}}\n", "{'geometry': {'coordinates': [-78.7827814, 35.759087052]},\n", " 'properties': {'crime_type': 'VANDALISM - GRAFFITI',\n", " 'date_from': '2018-07-29'}}\n" ] } ], "source": [ "loc = SON([('type', 'Point'), ('coordinates', [-78.78200313, 35.760212065])])\n", "\n", "for doc in crime.find(\n", " {\n", " 'geometry' : SON([('$geoNear', {'$geometry' : loc, '$minDistance': 1e-6, '$maxDistance': 200})]),\n", " },\n", " {\n", " '_id': 0,\n", " 'geometry.coordinates': 1,\n", " 'properties.crime_type': 1,\n", " 'properties.date_from': 1\n", " }\n", "):\n", " pprint(doc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" } }, "nbformat": 4, "nbformat_minor": 2 }