MongoDB

MongoDB is a document database. It stores JSON objects.

In [1]:
from pymongo import MongoClient, GEOSPHERE
from bson.objectid import ObjectId
from bson.son import SON
In [2]:
import requests
from bson import json_util
In [3]:
import collections
from pathlib import Path
In [4]:
from pprint import pprint

Set up

This connects to the MongoDB daemon

In [5]:
client = MongoClient()

This specifies the database. It does not matter if it does not exist.

In [6]:
client.drop_database('starwars')
In [7]:
db = client.starwars

This specifies a collection

In [8]:
people = db.people

Check what collections are in the database. Note that the people collection is only created when the first value is inserted.

In [9]:
db.list_collection_names()
Out[9]:
[]

Get Data

In [10]:
base_url = 'https://swapi.co/api'
In [11]:
resp = requests.get('https://swapi.co/api/people/1')
data = resp.json()
In [12]:
data
Out[12]:
{'birth_year': '19BBY',
 'created': '2014-12-09T13:50:51.644000Z',
 'edited': '2014-12-20T21:17:56.891000Z',
 'eye_color': 'blue',
 'films': ['https://swapi.co/api/films/2/',
  'https://swapi.co/api/films/6/',
  'https://swapi.co/api/films/3/',
  'https://swapi.co/api/films/1/',
  'https://swapi.co/api/films/7/'],
 'gender': 'male',
 'hair_color': 'blond',
 'height': '172',
 'homeworld': 'https://swapi.co/api/planets/1/',
 'mass': '77',
 'name': 'Luke Skywalker',
 'skin_color': 'fair',
 'species': ['https://swapi.co/api/species/1/'],
 'starships': ['https://swapi.co/api/starships/12/',
  'https://swapi.co/api/starships/22/'],
 'url': 'https://swapi.co/api/people/1/',
 'vehicles': ['https://swapi.co/api/vehicles/14/',
  'https://swapi.co/api/vehicles/30/']}
In [13]:
def convert(data):
    """Nest inner API calls one level dowwn."""
    for key, val in data.items():
        if isinstance(val, list):
            data[key] = [requests.get(value).json() for value in val
                         if isinstance(value, str) and value.startswith(base_url)]
        else:
            if isinstance(val, str) and val.startswith(base_url):
                data[key] = requests.get(val).json()
    return data
In [14]:
luke = convert(data)
In [15]:
def convert_str(s):
    if '.' in s:
        try:
            s = float(s)
        except:
            pass
    else:
        try:
            s = int(s)
        except:
            pass
    return s

def to_numeric(data):
    for key in data:
        val = data[key]
        if isinstance(val, str):
            data[key] = convert_str(val)
        elif isinstance(val, dict):
            for k, v in val.items():
                if isinstance(v, str):
                    val[k] = convert_str(v)
        elif isinstance(val, list):
            for i, item in enumerate(val):
                if isinstance(item, str):
                    data[key][i] = convert_str(item)
                elif isinstance(item, dict):
                    for k, v in item.items():
                        if isinstance(v, str):
                            data[key][i][k] = convert_str(v)
    return data
In [16]:
luke = to_numeric(luke)
In [17]:
pprint(luke)
{'birth_year': '19BBY',
 'created': '2014-12-09T13:50:51.644000Z',
 'edited': '2014-12-20T21:17:56.891000Z',
 'eye_color': 'blue',
 'films': [{'characters': ['https://swapi.co/api/people/1/',
                           'https://swapi.co/api/people/2/',
                           'https://swapi.co/api/people/3/',
                           'https://swapi.co/api/people/4/',
                           'https://swapi.co/api/people/5/',
                           'https://swapi.co/api/people/10/',
                           'https://swapi.co/api/people/13/',
                           'https://swapi.co/api/people/14/',
                           'https://swapi.co/api/people/18/',
                           'https://swapi.co/api/people/20/',
                           'https://swapi.co/api/people/21/',
                           'https://swapi.co/api/people/22/',
                           'https://swapi.co/api/people/23/',
                           'https://swapi.co/api/people/24/',
                           'https://swapi.co/api/people/25/',
                           'https://swapi.co/api/people/26/'],
            'created': '2014-12-12T11:26:24.656000Z',
            'director': 'Irvin Kershner',
            'edited': '2017-04-19T10:57:29.544256Z',
            'episode_id': 5,
            'opening_crawl': 'It is a dark time for the\r\n'
                             'Rebellion. Although the Death\r\n'
                             'Star has been destroyed,\r\n'
                             'Imperial troops have driven the\r\n'
                             'Rebel forces from their hidden\r\n'
                             'base and pursued them across\r\n'
                             'the galaxy.\r\n'
                             '\r\n'
                             'Evading the dreaded Imperial\r\n'
                             'Starfleet, a group of freedom\r\n'
                             'fighters led by Luke Skywalker\r\n'
                             'has established a new secret\r\n'
                             'base on the remote ice world\r\n'
                             'of Hoth.\r\n'
                             '\r\n'
                             'The evil lord Darth Vader,\r\n'
                             'obsessed with finding young\r\n'
                             'Skywalker, has dispatched\r\n'
                             'thousands of remote probes into\r\n'
                             'the far reaches of space....',
            'planets': ['https://swapi.co/api/planets/4/',
                        'https://swapi.co/api/planets/5/',
                        'https://swapi.co/api/planets/6/',
                        'https://swapi.co/api/planets/27/'],
            'producer': 'Gary Kurtz, Rick McCallum',
            'release_date': '1980-05-17',
            'species': ['https://swapi.co/api/species/6/',
                        'https://swapi.co/api/species/7/',
                        'https://swapi.co/api/species/3/',
                        'https://swapi.co/api/species/2/',
                        'https://swapi.co/api/species/1/'],
            'starships': ['https://swapi.co/api/starships/15/',
                          'https://swapi.co/api/starships/10/',
                          'https://swapi.co/api/starships/11/',
                          'https://swapi.co/api/starships/12/',
                          'https://swapi.co/api/starships/21/',
                          'https://swapi.co/api/starships/22/',
                          'https://swapi.co/api/starships/23/',
                          'https://swapi.co/api/starships/3/',
                          'https://swapi.co/api/starships/17/'],
            'title': 'The Empire Strikes Back',
            'url': 'https://swapi.co/api/films/2/',
            'vehicles': ['https://swapi.co/api/vehicles/8/',
                         'https://swapi.co/api/vehicles/14/',
                         'https://swapi.co/api/vehicles/16/',
                         'https://swapi.co/api/vehicles/18/',
                         'https://swapi.co/api/vehicles/19/',
                         'https://swapi.co/api/vehicles/20/']},
           {'characters': ['https://swapi.co/api/people/1/',
                           'https://swapi.co/api/people/2/',
                           'https://swapi.co/api/people/3/',
                           'https://swapi.co/api/people/4/',
                           'https://swapi.co/api/people/5/',
                           'https://swapi.co/api/people/6/',
                           'https://swapi.co/api/people/7/',
                           'https://swapi.co/api/people/10/',
                           'https://swapi.co/api/people/11/',
                           'https://swapi.co/api/people/12/',
                           'https://swapi.co/api/people/13/',
                           'https://swapi.co/api/people/20/',
                           'https://swapi.co/api/people/21/',
                           'https://swapi.co/api/people/33/',
                           'https://swapi.co/api/people/46/',
                           'https://swapi.co/api/people/51/',
                           'https://swapi.co/api/people/52/',
                           'https://swapi.co/api/people/53/',
                           'https://swapi.co/api/people/54/',
                           'https://swapi.co/api/people/55/',
                           'https://swapi.co/api/people/56/',
                           'https://swapi.co/api/people/58/',
                           'https://swapi.co/api/people/63/',
                           'https://swapi.co/api/people/64/',
                           'https://swapi.co/api/people/67/',
                           'https://swapi.co/api/people/68/',
                           'https://swapi.co/api/people/75/',
                           'https://swapi.co/api/people/78/',
                           'https://swapi.co/api/people/79/',
                           'https://swapi.co/api/people/80/',
                           'https://swapi.co/api/people/81/',
                           'https://swapi.co/api/people/82/',
                           'https://swapi.co/api/people/83/',
                           'https://swapi.co/api/people/35/'],
            'created': '2014-12-20T18:49:38.403000Z',
            'director': 'George Lucas',
            'edited': '2015-04-11T09:45:44.862122Z',
            'episode_id': 3,
            'opening_crawl': 'War! The Republic is crumbling\r\n'
                             'under attacks by the ruthless\r\n'
                             'Sith Lord, Count Dooku.\r\n'
                             'There are heroes on both sides.\r\n'
                             'Evil is everywhere.\r\n'
                             '\r\n'
                             'In a stunning move, the\r\n'
                             'fiendish droid leader, General\r\n'
                             'Grievous, has swept into the\r\n'
                             'Republic capital and kidnapped\r\n'
                             'Chancellor Palpatine, leader of\r\n'
                             'the Galactic Senate.\r\n'
                             '\r\n'
                             'As the Separatist Droid Army\r\n'
                             'attempts to flee the besieged\r\n'
                             'capital with their valuable\r\n'
                             'hostage, two Jedi Knights lead a\r\n'
                             'desperate mission to rescue the\r\n'
                             'captive Chancellor....',
            'planets': ['https://swapi.co/api/planets/2/',
                        'https://swapi.co/api/planets/5/',
                        'https://swapi.co/api/planets/8/',
                        'https://swapi.co/api/planets/9/',
                        'https://swapi.co/api/planets/12/',
                        'https://swapi.co/api/planets/13/',
                        'https://swapi.co/api/planets/14/',
                        'https://swapi.co/api/planets/15/',
                        'https://swapi.co/api/planets/16/',
                        'https://swapi.co/api/planets/17/',
                        'https://swapi.co/api/planets/18/',
                        'https://swapi.co/api/planets/19/',
                        'https://swapi.co/api/planets/1/'],
            'producer': 'Rick McCallum',
            'release_date': '2005-05-19',
            'species': ['https://swapi.co/api/species/19/',
                        'https://swapi.co/api/species/33/',
                        'https://swapi.co/api/species/2/',
                        'https://swapi.co/api/species/3/',
                        'https://swapi.co/api/species/36/',
                        'https://swapi.co/api/species/37/',
                        'https://swapi.co/api/species/6/',
                        'https://swapi.co/api/species/1/',
                        'https://swapi.co/api/species/34/',
                        'https://swapi.co/api/species/15/',
                        'https://swapi.co/api/species/35/',
                        'https://swapi.co/api/species/20/',
                        'https://swapi.co/api/species/23/',
                        'https://swapi.co/api/species/24/',
                        'https://swapi.co/api/species/25/',
                        'https://swapi.co/api/species/26/',
                        'https://swapi.co/api/species/27/',
                        'https://swapi.co/api/species/28/',
                        'https://swapi.co/api/species/29/',
                        'https://swapi.co/api/species/30/'],
            'starships': ['https://swapi.co/api/starships/48/',
                          'https://swapi.co/api/starships/59/',
                          'https://swapi.co/api/starships/61/',
                          'https://swapi.co/api/starships/32/',
                          'https://swapi.co/api/starships/63/',
                          'https://swapi.co/api/starships/64/',
                          'https://swapi.co/api/starships/65/',
                          'https://swapi.co/api/starships/66/',
                          'https://swapi.co/api/starships/74/',
                          'https://swapi.co/api/starships/75/',
                          'https://swapi.co/api/starships/2/',
                          'https://swapi.co/api/starships/68/'],
            'title': 'Revenge of the Sith',
            'url': 'https://swapi.co/api/films/6/',
            'vehicles': ['https://swapi.co/api/vehicles/33/',
                         'https://swapi.co/api/vehicles/50/',
                         'https://swapi.co/api/vehicles/53/',
                         'https://swapi.co/api/vehicles/56/',
                         'https://swapi.co/api/vehicles/60/',
                         'https://swapi.co/api/vehicles/62/',
                         'https://swapi.co/api/vehicles/67/',
                         'https://swapi.co/api/vehicles/69/',
                         'https://swapi.co/api/vehicles/70/',
                         'https://swapi.co/api/vehicles/71/',
                         'https://swapi.co/api/vehicles/72/',
                         'https://swapi.co/api/vehicles/73/',
                         'https://swapi.co/api/vehicles/76/']},
           {'characters': ['https://swapi.co/api/people/1/',
                           'https://swapi.co/api/people/2/',
                           'https://swapi.co/api/people/3/',
                           'https://swapi.co/api/people/4/',
                           'https://swapi.co/api/people/5/',
                           'https://swapi.co/api/people/10/',
                           'https://swapi.co/api/people/13/',
                           'https://swapi.co/api/people/14/',
                           'https://swapi.co/api/people/16/',
                           'https://swapi.co/api/people/18/',
                           'https://swapi.co/api/people/20/',
                           'https://swapi.co/api/people/21/',
                           'https://swapi.co/api/people/22/',
                           'https://swapi.co/api/people/25/',
                           'https://swapi.co/api/people/27/',
                           'https://swapi.co/api/people/28/',
                           'https://swapi.co/api/people/29/',
                           'https://swapi.co/api/people/30/',
                           'https://swapi.co/api/people/31/',
                           'https://swapi.co/api/people/45/'],
            'created': '2014-12-18T10:39:33.255000Z',
            'director': 'Richard Marquand',
            'edited': '2015-04-11T09:46:05.220365Z',
            'episode_id': 6,
            'opening_crawl': 'Luke Skywalker has returned to\r\n'
                             'his home planet of Tatooine in\r\n'
                             'an attempt to rescue his\r\n'
                             'friend Han Solo from the\r\n'
                             'clutches of the vile gangster\r\n'
                             'Jabba the Hutt.\r\n'
                             '\r\n'
                             'Little does Luke know that the\r\n'
                             'GALACTIC EMPIRE has secretly\r\n'
                             'begun construction on a new\r\n'
                             'armored space station even\r\n'
                             'more powerful than the first\r\n'
                             'dreaded Death Star.\r\n'
                             '\r\n'
                             'When completed, this ultimate\r\n'
                             'weapon will spell certain doom\r\n'
                             'for the small band of rebels\r\n'
                             'struggling to restore freedom\r\n'
                             'to the galaxy...',
            'planets': ['https://swapi.co/api/planets/5/',
                        'https://swapi.co/api/planets/7/',
                        'https://swapi.co/api/planets/8/',
                        'https://swapi.co/api/planets/9/',
                        'https://swapi.co/api/planets/1/'],
            'producer': 'Howard G. Kazanjian, George Lucas, Rick McCallum',
            'release_date': '1983-05-25',
            'species': ['https://swapi.co/api/species/1/',
                        'https://swapi.co/api/species/2/',
                        'https://swapi.co/api/species/3/',
                        'https://swapi.co/api/species/5/',
                        'https://swapi.co/api/species/6/',
                        'https://swapi.co/api/species/8/',
                        'https://swapi.co/api/species/9/',
                        'https://swapi.co/api/species/10/',
                        'https://swapi.co/api/species/15/'],
            'starships': ['https://swapi.co/api/starships/15/',
                          'https://swapi.co/api/starships/10/',
                          'https://swapi.co/api/starships/11/',
                          'https://swapi.co/api/starships/12/',
                          'https://swapi.co/api/starships/22/',
                          'https://swapi.co/api/starships/23/',
                          'https://swapi.co/api/starships/27/',
                          'https://swapi.co/api/starships/28/',
                          'https://swapi.co/api/starships/29/',
                          'https://swapi.co/api/starships/3/',
                          'https://swapi.co/api/starships/17/',
                          'https://swapi.co/api/starships/2/'],
            'title': 'Return of the Jedi',
            'url': 'https://swapi.co/api/films/3/',
            'vehicles': ['https://swapi.co/api/vehicles/8/',
                         'https://swapi.co/api/vehicles/16/',
                         'https://swapi.co/api/vehicles/18/',
                         'https://swapi.co/api/vehicles/19/',
                         'https://swapi.co/api/vehicles/24/',
                         'https://swapi.co/api/vehicles/25/',
                         'https://swapi.co/api/vehicles/26/',
                         'https://swapi.co/api/vehicles/30/']},
           {'characters': ['https://swapi.co/api/people/1/',
                           'https://swapi.co/api/people/2/',
                           'https://swapi.co/api/people/3/',
                           'https://swapi.co/api/people/4/',
                           'https://swapi.co/api/people/5/',
                           'https://swapi.co/api/people/6/',
                           'https://swapi.co/api/people/7/',
                           'https://swapi.co/api/people/8/',
                           'https://swapi.co/api/people/9/',
                           'https://swapi.co/api/people/10/',
                           'https://swapi.co/api/people/12/',
                           'https://swapi.co/api/people/13/',
                           'https://swapi.co/api/people/14/',
                           'https://swapi.co/api/people/15/',
                           'https://swapi.co/api/people/16/',
                           'https://swapi.co/api/people/18/',
                           'https://swapi.co/api/people/19/',
                           'https://swapi.co/api/people/81/'],
            'created': '2014-12-10T14:23:31.880000Z',
            'director': 'George Lucas',
            'edited': '2015-04-11T09:46:52.774897Z',
            'episode_id': 4,
            'opening_crawl': 'It is a period of civil war.\r\n'
                             'Rebel spaceships, striking\r\n'
                             'from a hidden base, have won\r\n'
                             'their first victory against\r\n'
                             'the evil Galactic Empire.\r\n'
                             '\r\n'
                             'During the battle, Rebel\r\n'
                             'spies managed to steal secret\r\n'
                             "plans to the Empire's\r\n"
                             'ultimate weapon, the DEATH\r\n'
                             'STAR, an armored space\r\n'
                             'station with enough power\r\n'
                             'to destroy an entire planet.\r\n'
                             '\r\n'
                             "Pursued by the Empire's\r\n"
                             'sinister agents, Princess\r\n'
                             'Leia races home aboard her\r\n'
                             'starship, custodian of the\r\n'
                             'stolen plans that can save her\r\n'
                             'people and restore\r\n'
                             'freedom to the galaxy....',
            'planets': ['https://swapi.co/api/planets/2/',
                        'https://swapi.co/api/planets/3/',
                        'https://swapi.co/api/planets/1/'],
            'producer': 'Gary Kurtz, Rick McCallum',
            'release_date': '1977-05-25',
            'species': ['https://swapi.co/api/species/5/',
                        'https://swapi.co/api/species/3/',
                        'https://swapi.co/api/species/2/',
                        'https://swapi.co/api/species/1/',
                        'https://swapi.co/api/species/4/'],
            'starships': ['https://swapi.co/api/starships/2/',
                          'https://swapi.co/api/starships/3/',
                          'https://swapi.co/api/starships/5/',
                          'https://swapi.co/api/starships/9/',
                          'https://swapi.co/api/starships/10/',
                          'https://swapi.co/api/starships/11/',
                          'https://swapi.co/api/starships/12/',
                          'https://swapi.co/api/starships/13/'],
            'title': 'A New Hope',
            'url': 'https://swapi.co/api/films/1/',
            'vehicles': ['https://swapi.co/api/vehicles/4/',
                         'https://swapi.co/api/vehicles/6/',
                         'https://swapi.co/api/vehicles/7/',
                         'https://swapi.co/api/vehicles/8/']},
           {'characters': ['https://swapi.co/api/people/1/',
                           'https://swapi.co/api/people/3/',
                           'https://swapi.co/api/people/5/',
                           'https://swapi.co/api/people/13/',
                           'https://swapi.co/api/people/14/',
                           'https://swapi.co/api/people/27/',
                           'https://swapi.co/api/people/84/',
                           'https://swapi.co/api/people/85/',
                           'https://swapi.co/api/people/86/',
                           'https://swapi.co/api/people/87/',
                           'https://swapi.co/api/people/88/'],
            'created': '2015-04-17T06:51:30.504780Z',
            'director': 'J. J. Abrams',
            'edited': '2015-12-17T14:31:47.617768Z',
            'episode_id': 7,
            'opening_crawl': 'Luke Skywalker has vanished.\r\n'
                             'In his absence, the sinister\r\n'
                             'FIRST ORDER has risen from\r\n'
                             'the ashes of the Empire\r\n'
                             'and will not rest until\r\n'
                             'Skywalker, the last Jedi,\r\n'
                             'has been destroyed.\r\n'
                             ' \r\n'
                             'With the support of the\r\n'
                             'REPUBLIC, General Leia Organa\r\n'
                             'leads a brave RESISTANCE.\r\n'
                             'She is desperate to find her\r\n'
                             'brother Luke and gain his\r\n'
                             'help in restoring peace and\r\n'
                             'justice to the galaxy.\r\n'
                             ' \r\n'
                             'Leia has sent her most daring\r\n'
                             'pilot on a secret mission\r\n'
                             'to Jakku, where an old ally\r\n'
                             'has discovered a clue to\r\n'
                             "Luke's whereabouts....",
            'planets': ['https://swapi.co/api/planets/61/'],
            'producer': 'Kathleen Kennedy, J. J. Abrams, Bryan Burk',
            'release_date': '2015-12-11',
            'species': ['https://swapi.co/api/species/3/',
                        'https://swapi.co/api/species/2/',
                        'https://swapi.co/api/species/1/'],
            'starships': ['https://swapi.co/api/starships/77/',
                          'https://swapi.co/api/starships/10/'],
            'title': 'The Force Awakens',
            'url': 'https://swapi.co/api/films/7/',
            'vehicles': []}],
 'gender': 'male',
 'hair_color': 'blond',
 'height': 172,
 'homeworld': {'climate': 'arid',
               'created': '2014-12-09T13:50:49.641000Z',
               'diameter': 10465,
               'edited': '2014-12-21T20:48:04.175778Z',
               'films': ['https://swapi.co/api/films/5/',
                         'https://swapi.co/api/films/4/',
                         'https://swapi.co/api/films/6/',
                         'https://swapi.co/api/films/3/',
                         'https://swapi.co/api/films/1/'],
               'gravity': '1 standard',
               'name': 'Tatooine',
               'orbital_period': 304,
               'population': 200000,
               'residents': ['https://swapi.co/api/people/1/',
                             'https://swapi.co/api/people/2/',
                             'https://swapi.co/api/people/4/',
                             'https://swapi.co/api/people/6/',
                             'https://swapi.co/api/people/7/',
                             'https://swapi.co/api/people/8/',
                             'https://swapi.co/api/people/9/',
                             'https://swapi.co/api/people/11/',
                             'https://swapi.co/api/people/43/',
                             'https://swapi.co/api/people/62/'],
               'rotation_period': 23,
               'surface_water': 1,
               'terrain': 'desert',
               'url': 'https://swapi.co/api/planets/1/'},
 'mass': 77,
 'name': 'Luke Skywalker',
 'skin_color': 'fair',
 'species': [{'average_height': 180,
              'average_lifespan': 120,
              'classification': 'mammal',
              'created': '2014-12-10T13:52:11.567000Z',
              'designation': 'sentient',
              'edited': '2015-04-17T06:59:55.850671Z',
              'eye_colors': 'brown, blue, green, hazel, grey, amber',
              'films': ['https://swapi.co/api/films/2/',
                        'https://swapi.co/api/films/7/',
                        'https://swapi.co/api/films/5/',
                        'https://swapi.co/api/films/4/',
                        'https://swapi.co/api/films/6/',
                        'https://swapi.co/api/films/3/',
                        'https://swapi.co/api/films/1/'],
              'hair_colors': 'blonde, brown, black, red',
              'homeworld': 'https://swapi.co/api/planets/9/',
              'language': 'Galactic Basic',
              'name': 'Human',
              'people': ['https://swapi.co/api/people/1/',
                         'https://swapi.co/api/people/4/',
                         'https://swapi.co/api/people/5/',
                         'https://swapi.co/api/people/6/',
                         'https://swapi.co/api/people/7/',
                         'https://swapi.co/api/people/9/',
                         'https://swapi.co/api/people/10/',
                         'https://swapi.co/api/people/11/',
                         'https://swapi.co/api/people/12/',
                         'https://swapi.co/api/people/14/',
                         'https://swapi.co/api/people/18/',
                         'https://swapi.co/api/people/19/',
                         'https://swapi.co/api/people/21/',
                         'https://swapi.co/api/people/22/',
                         'https://swapi.co/api/people/25/',
                         'https://swapi.co/api/people/26/',
                         'https://swapi.co/api/people/28/',
                         'https://swapi.co/api/people/29/',
                         'https://swapi.co/api/people/32/',
                         'https://swapi.co/api/people/34/',
                         'https://swapi.co/api/people/43/',
                         'https://swapi.co/api/people/51/',
                         'https://swapi.co/api/people/60/',
                         'https://swapi.co/api/people/61/',
                         'https://swapi.co/api/people/62/',
                         'https://swapi.co/api/people/66/',
                         'https://swapi.co/api/people/67/',
                         'https://swapi.co/api/people/68/',
                         'https://swapi.co/api/people/69/',
                         'https://swapi.co/api/people/74/',
                         'https://swapi.co/api/people/81/',
                         'https://swapi.co/api/people/84/',
                         'https://swapi.co/api/people/85/',
                         'https://swapi.co/api/people/86/',
                         'https://swapi.co/api/people/35/'],
              'skin_colors': 'caucasian, black, asian, hispanic',
              'url': 'https://swapi.co/api/species/1/'}],
 'starships': [{'MGLT': 100,
                'cargo_capacity': 110,
                'consumables': '1 week',
                'cost_in_credits': 149999,
                'created': '2014-12-12T11:19:05.340000Z',
                'crew': 1,
                'edited': '2014-12-22T17:35:44.491233Z',
                'films': ['https://swapi.co/api/films/2/',
                          'https://swapi.co/api/films/3/',
                          'https://swapi.co/api/films/1/'],
                'hyperdrive_rating': 1.0,
                'length': 12.5,
                'manufacturer': 'Incom Corporation',
                'max_atmosphering_speed': 1050,
                'model': 'T-65 X-wing',
                'name': 'X-wing',
                'passengers': 0,
                'pilots': ['https://swapi.co/api/people/1/',
                           'https://swapi.co/api/people/9/',
                           'https://swapi.co/api/people/18/',
                           'https://swapi.co/api/people/19/'],
                'starship_class': 'Starfighter',
                'url': 'https://swapi.co/api/starships/12/'},
               {'MGLT': 50,
                'cargo_capacity': 80000,
                'consumables': '2 months',
                'cost_in_credits': 240000,
                'created': '2014-12-15T13:04:47.235000Z',
                'crew': 6,
                'edited': '2014-12-22T17:35:44.795405Z',
                'films': ['https://swapi.co/api/films/2/',
                          'https://swapi.co/api/films/3/'],
                'hyperdrive_rating': 1.0,
                'length': 20,
                'manufacturer': 'Sienar Fleet Systems',
                'max_atmosphering_speed': 850,
                'model': 'Lambda-class T-4a shuttle',
                'name': 'Imperial shuttle',
                'passengers': 20,
                'pilots': ['https://swapi.co/api/people/1/',
                           'https://swapi.co/api/people/13/',
                           'https://swapi.co/api/people/14/'],
                'starship_class': 'Armed government transport',
                'url': 'https://swapi.co/api/starships/22/'}],
 'url': {'birth_year': '19BBY',
         'created': '2014-12-09T13:50:51.644000Z',
         'edited': '2014-12-20T21:17:56.891000Z',
         'eye_color': 'blue',
         'films': ['https://swapi.co/api/films/2/',
                   'https://swapi.co/api/films/6/',
                   'https://swapi.co/api/films/3/',
                   'https://swapi.co/api/films/1/',
                   'https://swapi.co/api/films/7/'],
         'gender': 'male',
         'hair_color': 'blond',
         'height': 172,
         'homeworld': 'https://swapi.co/api/planets/1/',
         'mass': 77,
         'name': 'Luke Skywalker',
         'skin_color': 'fair',
         'species': ['https://swapi.co/api/species/1/'],
         'starships': ['https://swapi.co/api/starships/12/',
                       'https://swapi.co/api/starships/22/'],
         'url': 'https://swapi.co/api/people/1/',
         'vehicles': ['https://swapi.co/api/vehicles/14/',
                      'https://swapi.co/api/vehicles/30/']},
 'vehicles': [{'cargo_capacity': 10,
               'consumables': 'none',
               'cost_in_credits': 'unknown',
               'created': '2014-12-15T12:22:12Z',
               'crew': 2,
               'edited': '2014-12-22T18:21:15.623033Z',
               'films': ['https://swapi.co/api/films/2/'],
               'length': 4.5,
               'manufacturer': 'Incom corporation',
               'max_atmosphering_speed': 650,
               'model': 't-47 airspeeder',
               'name': 'Snowspeeder',
               'passengers': 0,
               'pilots': ['https://swapi.co/api/people/1/',
                          'https://swapi.co/api/people/18/'],
               'url': 'https://swapi.co/api/vehicles/14/',
               'vehicle_class': 'airspeeder'},
              {'cargo_capacity': 4,
               'consumables': '1 day',
               'cost_in_credits': 8000,
               'created': '2014-12-18T11:20:04.625000Z',
               'crew': 1,
               'edited': '2014-12-22T18:21:15.920537Z',
               'films': ['https://swapi.co/api/films/3/'],
               'length': 3,
               'manufacturer': 'Aratech Repulsor Company',
               'max_atmosphering_speed': 360,
               'model': '74-Z speeder bike',
               'name': 'Imperial Speeder Bike',
               'passengers': 1,
               'pilots': ['https://swapi.co/api/people/1/',
                          'https://swapi.co/api/people/5/'],
               'url': 'https://swapi.co/api/vehicles/30/',
               'vehicle_class': 'speeder'}]}

Insertion

Single inserts

In [18]:
result = people.insert_one(data)
In [19]:
db.list_collection_names()
Out[19]:
['people']

Bulk inserts

In [20]:
xs = [to_numeric(convert(requests.get('https://swapi.co/api/people/%i' % i).json()))
      for i in range(2, 11)]
In [21]:
result = people.insert_many(xs)
In [22]:
result.inserted_ids
Out[22]:
[ObjectId('5b8fc6062b84ad7967ef9949'),
 ObjectId('5b8fc6062b84ad7967ef994a'),
 ObjectId('5b8fc6062b84ad7967ef994b'),
 ObjectId('5b8fc6062b84ad7967ef994c'),
 ObjectId('5b8fc6062b84ad7967ef994d'),
 ObjectId('5b8fc6062b84ad7967ef994e'),
 ObjectId('5b8fc6062b84ad7967ef994f'),
 ObjectId('5b8fc6062b84ad7967ef9950'),
 ObjectId('5b8fc6062b84ad7967ef9951')]

Queries

In [23]:
people.find_one(
    # search criteria
    {'name': 'Luke Skywalker'},
    # values to return
    {'name': True,
     'hair_color': True,
     'skin_color': True,
     'eye_color': True
    }
)
Out[23]:
{'_id': ObjectId('5b8fc5ee2b84ad7967ef9948'),
 'eye_color': 'blue',
 'hair_color': 'blond',
 'name': 'Luke Skywalker',
 'skin_color': 'fair'}

Using object ID

Note that ObjectID is NOT a string. You must convert a string to ObjectID before use.

In [24]:
result.inserted_ids[0]
Out[24]:
ObjectId('5b8fc6062b84ad7967ef9949')
In [25]:
people.find_one(
    result.inserted_ids[0],
    {'name': True, 'hair_color': True, 'skin_color': True, 'eye_color': True}
)
Out[25]:
{'_id': ObjectId('5b8fc6062b84ad7967ef9949'),
 'eye_color': 'yellow',
 'hair_color': 'n/a',
 'name': 'C-3PO',
 'skin_color': 'gold'}

Bulk queries

In [26]:
for person in people.find({'gender': 'male'}):
    print(person['name'])
Luke Skywalker
Darth Vader
Owen Lars
Biggs Darklighter
Obi-Wan Kenobi
In [27]:
for x in people.find(
    {'gender': 'male'},
    {
        'name': True,
        'gender': True
    }
):
    print(x)
{'name': 'Luke Skywalker', 'gender': 'male', '_id': ObjectId('5b8fc5ee2b84ad7967ef9948')}
{'name': 'Darth Vader', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef994b')}
{'name': 'Owen Lars', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef994d')}
{'name': 'Biggs Darklighter', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef9950')}
{'name': 'Obi-Wan Kenobi', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef9951')}

Using relational operators

In [31]:
for x in people.find(
    {
        'mass': {'$gt': 100},
    },
    {
        'name': True,
        'mass': True,
        '_id': False
    }
):
    pprint(x)
{'mass': 136, 'name': 'Darth Vader'}
{'mass': 120, 'name': 'Owen Lars'}
In [32]:
mass_range = {'$lt': 100, '$gt': 50}
In [34]:
for x in people.find(
    {
        'mass': mass_range,
    },
    {
        'name': True,
        'mass': True,
        '_id': False
    }
):
    print(x)
{'mass': 77, 'name': 'Luke Skywalker'}
{'mass': 75, 'name': 'C-3PO'}
{'mass': 75, 'name': 'Beru Whitesun lars'}
{'mass': 84, 'name': 'Biggs Darklighter'}
{'mass': 77, 'name': 'Obi-Wan Kenobi'}
In [35]:
for x in people.find(
    {
        'mass': mass_range,
    },
    {
        'name': True,
        'mass': True,
        '_id': False
    }
):
    pprint(x)
{'mass': 77, 'name': 'Luke Skywalker'}
{'mass': 75, 'name': 'C-3PO'}
{'mass': 75, 'name': 'Beru Whitesun lars'}
{'mass': 84, 'name': 'Biggs Darklighter'}
{'mass': 77, 'name': 'Obi-Wan Kenobi'}

Matching multiple criteria

In [37]:
for x in people.find(
    {
        'starships.cost_in_credits': {'$lt': 250000},
        'starships.max_atmosphering_speed': {'$gt': 500},
        'starships.passengers': {'$gt': 0}
    },
    {
        'name': True,
        'starship.name': True,
        'starships.max_atmosphering_speed': True,
        'starships.passengers': True,
        'starships.cost_in_credits': True,
        '_id': False
    }
):
    pprint(x)
{'name': 'Luke Skywalker',
 'starships': [{'cost_in_credits': 149999,
                'max_atmosphering_speed': 1050,
                'passengers': 0},
               {'cost_in_credits': 240000,
                'max_atmosphering_speed': 850,
                'passengers': 20}]}
{'name': 'Obi-Wan Kenobi',
 'starships': [{'cost_in_credits': 180000,
                'max_atmosphering_speed': 1150,
                'passengers': 0},
               {'cost_in_credits': 125000000,
                'max_atmosphering_speed': 1050,
                'passengers': 48247},
               {'cost_in_credits': 'unknown',
                'max_atmosphering_speed': 1050,
                'passengers': 3},
               {'cost_in_credits': 320000,
                'max_atmosphering_speed': 1500,
                'passengers': 0},
               {'cost_in_credits': 168000,
                'max_atmosphering_speed': 1100,
                'passengers': 0}]}

Matching multiple criteria simultaneously

In [38]:
for x in people.find(
    {
        'starships': {
            '$elemMatch': {
                'cost_in_credits': {'$lt': 250000},
                'max_atmosphering_speed': {'$gt': 500},
                'passengers': {'$gt': 1}
            }
        }
    },
    {
        'name': True,
        'starship.name': True,
        'starships.max_atmosphering_speed': True,
        'starships.passengers': True,
        'starships.cost_in_credits': True,
        '_id': False
    }
):
    pprint(x)
{'name': 'Luke Skywalker',
 'starships': [{'cost_in_credits': 149999,
                'max_atmosphering_speed': 1050,
                'passengers': 0},
               {'cost_in_credits': 240000,
                'max_atmosphering_speed': 850,
                'passengers': 20}]}

Indexes

In [39]:
people.find({}).explain
Out[39]:
<bound method Cursor.explain of <pymongo.cursor.Cursor object at 0x104c2c9e8>>
In [40]:
people.find({'name': 'Luke Skywalker'}).explain()
Out[40]:
{'executionStats': {'allPlansExecution': [],
  'executionStages': {'advanced': 1,
   'direction': 'forward',
   'docsExamined': 10,
   'executionTimeMillisEstimate': 0,
   'filter': {'name': {'$eq': 'Luke Skywalker'}},
   'invalidates': 0,
   'isEOF': 1,
   'nReturned': 1,
   'needTime': 10,
   'needYield': 0,
   'restoreState': 0,
   'saveState': 0,
   'stage': 'COLLSCAN',
   'works': 12},
  'executionSuccess': True,
  'executionTimeMillis': 0,
  'nReturned': 1,
  'totalDocsExamined': 10,
  'totalKeysExamined': 0},
 'ok': 1.0,
 'queryPlanner': {'indexFilterSet': False,
  'namespace': 'starwars.people',
  'parsedQuery': {'name': {'$eq': 'Luke Skywalker'}},
  'plannerVersion': 1,
  'rejectedPlans': [],
  'winningPlan': {'direction': 'forward',
   'filter': {'name': {'$eq': 'Luke Skywalker'}},
   'stage': 'COLLSCAN'}},
 'serverInfo': {'gitVersion': 'fc1573ba18aee42f97a3bb13b67af7d837826b47',
  'host': 'eris',
  'port': 27017,
  'version': '4.0.2'}}
In [41]:
people.create_index('name')
Out[41]:
'name_1'
In [42]:
people.find({'name': 'Luke Skywalker'}).explain()
Out[42]:
{'executionStats': {'allPlansExecution': [],
  'executionStages': {'advanced': 1,
   'alreadyHasObj': 0,
   'docsExamined': 1,
   'executionTimeMillisEstimate': 0,
   'inputStage': {'advanced': 1,
    'direction': 'forward',
    'dupsDropped': 0,
    'dupsTested': 0,
    'executionTimeMillisEstimate': 0,
    'indexBounds': {'name': ['["Luke Skywalker", "Luke Skywalker"]']},
    'indexName': 'name_1',
    'indexVersion': 2,
    'invalidates': 0,
    'isEOF': 1,
    'isMultiKey': False,
    'isPartial': False,
    'isSparse': False,
    'isUnique': False,
    'keyPattern': {'name': 1},
    'keysExamined': 1,
    'multiKeyPaths': {'name': []},
    'nReturned': 1,
    'needTime': 0,
    'needYield': 0,
    'restoreState': 0,
    'saveState': 0,
    'seeks': 1,
    'seenInvalidated': 0,
    'stage': 'IXSCAN',
    'works': 2},
   'invalidates': 0,
   'isEOF': 1,
   'nReturned': 1,
   'needTime': 0,
   'needYield': 0,
   'restoreState': 0,
   'saveState': 0,
   'stage': 'FETCH',
   'works': 2},
  'executionSuccess': True,
  'executionTimeMillis': 3,
  'nReturned': 1,
  'totalDocsExamined': 1,
  'totalKeysExamined': 1},
 'ok': 1.0,
 'queryPlanner': {'indexFilterSet': False,
  'namespace': 'starwars.people',
  'parsedQuery': {'name': {'$eq': 'Luke Skywalker'}},
  'plannerVersion': 1,
  'rejectedPlans': [],
  'winningPlan': {'inputStage': {'direction': 'forward',
    'indexBounds': {'name': ['["Luke Skywalker", "Luke Skywalker"]']},
    'indexName': 'name_1',
    'indexVersion': 2,
    'isMultiKey': False,
    'isPartial': False,
    'isSparse': False,
    'isUnique': False,
    'keyPattern': {'name': 1},
    'multiKeyPaths': {'name': []},
    'stage': 'IXSCAN'},
   'stage': 'FETCH'}},
 'serverInfo': {'gitVersion': 'fc1573ba18aee42f97a3bb13b67af7d837826b47',
  'host': 'eris',
  'port': 27017,
  'version': '4.0.2'}}

Aggregate Queries

In [43]:
people.count_documents({'species.name': 'Human'})
Out[43]:
7

Using aggregate

The aggregate function runs a pipeline of commands, and uses the $group operator to summarize results.

In [44]:
from collections import OrderedDict

Filter and count

In [45]:
cmds = [
     {'$match': {'species.name': 'Human'}},
     {'$group': {'_id': '$species.name', 'count': {'$sum': 1}}},
]
In [46]:
pprint(list(people.aggregate(cmds)))
[{'_id': ['Human'], 'count': 7}]

Filter and find total mass

In [47]:
cmds = [
     {'$match': {'species.name': 'Human'}},
     {'$group': {'_id': '$species.name', 'total_mass': {'$sum': '$mass'}}},
]
In [48]:
pprint(list(people.aggregate(cmds)))
[{'_id': ['Human'], 'total_mass': 618}]

Total mass of each species

In [49]:
cmds = [
     {'$group': {'_id': '$species.name', 'total_mass': {'$sum': '$mass'}}},
]
In [50]:
pprint(list(people.aggregate(cmds)))
[{'_id': ['Droid'], 'total_mass': 139}, {'_id': ['Human'], 'total_mass': 618}]

Total and average mass of each species

In [51]:
cmds = [
     {
         '$group': {
             '_id': '$species.name',
             'total_mass': {'$sum': '$mass'},
             'avg_mass': {'$avg': '$mass'}
         }
     },
]
In [52]:
pprint(list(people.aggregate(cmds)))
[{'_id': ['Droid'], 'avg_mass': 46.333333333333336, 'total_mass': 139},
 {'_id': ['Human'], 'avg_mass': 88.28571428571429, 'total_mass': 618}]

Using MapReduce

With MapReduce you get the full power of JavaScript, but it is more complex and often less efficient. You should use aggregate in preference to map_reduce in most cases.

  • In the map stage, you create a (key, value) pair
  • In the reduce stage, you perform a reduction (e.g. sum) of the values associated with each key
In [53]:
from bson.code import Code

Count the number by eye_color.

In [54]:
mapper = Code('''
function() {
    emit(this.eye_color, 1);
}
''')

reducer = Code('''
function (key, values) {
    var total = 0;
    for (var i = 0; i < values.length; i++) {
        total += values[i];
    }
    return total;
}
''')

result = people.map_reduce(
    mapper,
    reducer,
    'result1'
)
In [55]:
for doc in result.find():
    pprint(doc)
{'_id': 'blue', 'value': 3.0}
{'_id': 'blue-gray', 'value': 1.0}
{'_id': 'brown', 'value': 2.0}
{'_id': 'red', 'value': 2.0}
{'_id': 'yellow', 'value': 2.0}

The output is also stored in the result1 collection we specified.

In [56]:
list(db.result1.find())
Out[56]:
[{'_id': 'blue', 'value': 3.0},
 {'_id': 'blue-gray', 'value': 1.0},
 {'_id': 'brown', 'value': 2.0},
 {'_id': 'red', 'value': 2.0},
 {'_id': 'yellow', 'value': 2.0}]

Using JavaScript Array functions to simplify code.

In [57]:
mapper = Code('''
function() {
    emit(this.eye_color, 1);
}
''')

reducer = Code('''
function (key, values) {
    return Array.sum(values);
}
''')

result = people.map_reduce(
    mapper,
    reducer,
    'result2'
)
In [58]:
for doc in result.find():
    pprint(doc)
{'_id': 'blue', 'value': 3.0}
{'_id': 'blue-gray', 'value': 1.0}
{'_id': 'brown', 'value': 2.0}
{'_id': 'red', 'value': 2.0}
{'_id': 'yellow', 'value': 2.0}

Find avergae mass by gender.

In [59]:
mapper = Code('''
function() {
    emit(this.gender, this.mass);
}
''')

reducer = Code('''
function (key, values) {
    return Array.avg(values);
}
''')

result = people.map_reduce(
    mapper,
    reducer,
    'result3'
)
In [60]:
for doc in result.find():
    pprint(doc)
{'_id': 'female', 'value': 62.0}
{'_id': 'male', 'value': 98.8}
{'_id': 'n/a', 'value': 46.333333333333336}

Count number of members in each species

In [61]:
mapper = Code('''
function() {
    this.species.map(function(z) {
      emit(z.name, 1);
    })
}
''')

reducer = Code('''
function (key, values) {
    return Array.sum(values);
}
''')

result = people.map_reduce(
    mapper,
    reducer,
    'result3'
)
In [62]:
for doc in result.find():
    pprint(doc)
{'_id': 'Droid', 'value': 3.0}
{'_id': 'Human', 'value': 7.0}

Geospatial queries

In [63]:
crime = db.crime
In [64]:
import json
In [65]:
path = 'data/crime-mapping.geojson'

with open(path) as f:
    datastore = json.load(f)
In [66]:
results = crime.insert_many(datastore['features'])
In [67]:
crime.find_one({})
Out[67]:
{'_id': ObjectId('5b8fc6472b84ad7967ef9952'),
 'geometry': {'coordinates': [-78.78200313, 35.760212065], 'type': 'Point'},
 'properties': {'activity_date': None,
  'apartment_complex': None,
  'beat_number': '112',
  'chrgcnt': None,
  'crime_category': 'ALL OTHER',
  'crime_type': 'ALL OTHER - ESCAPE FROM CUSTODY OR RESIST ARREST',
  'crimeday': 'THURSDAY',
  'date_from': '2017-11-30',
  'date_to': '11/30/2017',
  'district': 'D3',
  'domestic': 'N',
  'incident_number': '17010528',
  'lat': 35.760212065,
  'location_category': 'TOWN OWNED',
  'lon': -78.78200313,
  'map_reference': 'P027',
  'offensecategory': 'All Other Offenses',
  'period': ['Everything', 'Last Year'],
  'phxcommunity': 'No',
  'phxrecordstatus': None,
  'phxstatus': None,
  'radio': 'Everything,Last Year',
  'record': 3145,
  'residential_subdivision': 'SHOPPES OF KILDAIRE',
  'street': 'KILDAIRE FARM RD',
  'time_from': -62135553600,
  'time_to': -62135553600,
  'timeframe': ['Last Year'],
  'ucr': '2650',
  'violentproperty': 'All Other'},
 'type': 'Feature'}
In [68]:
crime.find_one({},
              {
                  'geometry': 1,
                  '_id': 0,
              }
              )
Out[68]:
{'geometry': {'coordinates': [-78.78200313, 35.760212065], 'type': 'Point'}}
In [69]:
crime.create_index([('geometry', GEOSPHERE)])
Out[69]:
'geometry_2dsphere'

List 5 crimes near the location

In [70]:
loc = SON([('type', 'Point'), ('coordinates', [-78.78200313, 35.760212065])])

for doc in crime.find(
    {
        'geometry' : SON([('$near', {'$geometry' : loc})])
    },
    {
        '_id': 0,
        'properties.crime_type': 1,
        'properties.date_from': 1
    }
).limit(5):
    pprint(doc)
{'properties': {'crime_type': 'ALL OTHER - ESCAPE FROM CUSTODY OR RESIST '
                              'ARREST',
                'date_from': '2017-11-30'}}
{'properties': {'crime_type': 'LARCENY - AUTO PARTS OR ACCESSORIES',
                'date_from': '2018-03-20'}}
{'properties': {'crime_type': 'COUNTERFEITING - USING',
                'date_from': '2018-08-05'}}
{'properties': {'crime_type': 'DRUGS - DRUG VIOLATIONS '
                              '(POSS./SELL/MAN./DEL./TRNSPRT/CULT.)',
                'date_from': '2017-11-30'}}
{'properties': {'crime_type': 'VANDALISM - DAMAGE TO PROPERTY',
                'date_from': '2018-03-26'}}

List crimes committed nearby (within 200 m)

In [71]:
loc = SON([('type', 'Point'), ('coordinates', [-78.78200313, 35.760212065])])

for doc in crime.find(
    {
        'geometry' : SON([('$geoNear', {'$geometry' : loc, '$minDistance': 1e-6, '$maxDistance': 200})]),
    },
    {
        '_id': 0,
        'geometry.coordinates': 1,
        'properties.crime_type': 1,
        'properties.date_from': 1
    }
):
    pprint(doc)
{'geometry': {'coordinates': [-78.78102423, 35.7607323]},
 'properties': {'crime_type': 'ASSAULT - SIMPLE - ALL OTHER',
                'date_from': '2018-02-14'}}
{'geometry': {'coordinates': [-78.78131931, 35.761138061]},
 'properties': {'crime_type': 'VANDALISM - GRAFFITI',
                'date_from': '2018-07-20'}}
{'geometry': {'coordinates': [-78.7827814, 35.759087052]},
 'properties': {'crime_type': 'VANDALISM - GRAFFITI',
                'date_from': '2018-07-29'}}
In [ ]: