MongoDB¶
MongoDB is a document database. It stores JSON objects.
In [1]:
from pymongo import MongoClient, GEOSPHERE
from bson.objectid import ObjectId
from bson.son import SON
In [2]:
import requests
from bson import json_util
In [3]:
import collections
from pathlib import Path
In [4]:
from pprint import pprint
Set up¶
This connects to the MongoDB daemon
In [5]:
client = MongoClient()
This specifies the database. It does not matter if it does not exist.
In [6]:
client.drop_database('starwars')
In [7]:
db = client.starwars
This specifies a collection
In [8]:
people = db.people
Check what collections are in the database. Note that the people
collection is only created when the first value is inserted.
In [9]:
db.list_collection_names()
Out[9]:
[]
Get Data¶
In [10]:
base_url = 'https://swapi.co/api'
In [11]:
resp = requests.get('https://swapi.co/api/people/1')
data = resp.json()
In [12]:
data
Out[12]:
{'birth_year': '19BBY',
'created': '2014-12-09T13:50:51.644000Z',
'edited': '2014-12-20T21:17:56.891000Z',
'eye_color': 'blue',
'films': ['https://swapi.co/api/films/2/',
'https://swapi.co/api/films/6/',
'https://swapi.co/api/films/3/',
'https://swapi.co/api/films/1/',
'https://swapi.co/api/films/7/'],
'gender': 'male',
'hair_color': 'blond',
'height': '172',
'homeworld': 'https://swapi.co/api/planets/1/',
'mass': '77',
'name': 'Luke Skywalker',
'skin_color': 'fair',
'species': ['https://swapi.co/api/species/1/'],
'starships': ['https://swapi.co/api/starships/12/',
'https://swapi.co/api/starships/22/'],
'url': 'https://swapi.co/api/people/1/',
'vehicles': ['https://swapi.co/api/vehicles/14/',
'https://swapi.co/api/vehicles/30/']}
In [13]:
def convert(data):
"""Nest inner API calls one level dowwn."""
for key, val in data.items():
if isinstance(val, list):
data[key] = [requests.get(value).json() for value in val
if isinstance(value, str) and value.startswith(base_url)]
else:
if isinstance(val, str) and val.startswith(base_url):
data[key] = requests.get(val).json()
return data
In [14]:
luke = convert(data)
In [15]:
def convert_str(s):
if '.' in s:
try:
s = float(s)
except:
pass
else:
try:
s = int(s)
except:
pass
return s
def to_numeric(data):
for key in data:
val = data[key]
if isinstance(val, str):
data[key] = convert_str(val)
elif isinstance(val, dict):
for k, v in val.items():
if isinstance(v, str):
val[k] = convert_str(v)
elif isinstance(val, list):
for i, item in enumerate(val):
if isinstance(item, str):
data[key][i] = convert_str(item)
elif isinstance(item, dict):
for k, v in item.items():
if isinstance(v, str):
data[key][i][k] = convert_str(v)
return data
In [16]:
luke = to_numeric(luke)
In [17]:
pprint(luke)
{'birth_year': '19BBY',
'created': '2014-12-09T13:50:51.644000Z',
'edited': '2014-12-20T21:17:56.891000Z',
'eye_color': 'blue',
'films': [{'characters': ['https://swapi.co/api/people/1/',
'https://swapi.co/api/people/2/',
'https://swapi.co/api/people/3/',
'https://swapi.co/api/people/4/',
'https://swapi.co/api/people/5/',
'https://swapi.co/api/people/10/',
'https://swapi.co/api/people/13/',
'https://swapi.co/api/people/14/',
'https://swapi.co/api/people/18/',
'https://swapi.co/api/people/20/',
'https://swapi.co/api/people/21/',
'https://swapi.co/api/people/22/',
'https://swapi.co/api/people/23/',
'https://swapi.co/api/people/24/',
'https://swapi.co/api/people/25/',
'https://swapi.co/api/people/26/'],
'created': '2014-12-12T11:26:24.656000Z',
'director': 'Irvin Kershner',
'edited': '2017-04-19T10:57:29.544256Z',
'episode_id': 5,
'opening_crawl': 'It is a dark time for the\r\n'
'Rebellion. Although the Death\r\n'
'Star has been destroyed,\r\n'
'Imperial troops have driven the\r\n'
'Rebel forces from their hidden\r\n'
'base and pursued them across\r\n'
'the galaxy.\r\n'
'\r\n'
'Evading the dreaded Imperial\r\n'
'Starfleet, a group of freedom\r\n'
'fighters led by Luke Skywalker\r\n'
'has established a new secret\r\n'
'base on the remote ice world\r\n'
'of Hoth.\r\n'
'\r\n'
'The evil lord Darth Vader,\r\n'
'obsessed with finding young\r\n'
'Skywalker, has dispatched\r\n'
'thousands of remote probes into\r\n'
'the far reaches of space....',
'planets': ['https://swapi.co/api/planets/4/',
'https://swapi.co/api/planets/5/',
'https://swapi.co/api/planets/6/',
'https://swapi.co/api/planets/27/'],
'producer': 'Gary Kurtz, Rick McCallum',
'release_date': '1980-05-17',
'species': ['https://swapi.co/api/species/6/',
'https://swapi.co/api/species/7/',
'https://swapi.co/api/species/3/',
'https://swapi.co/api/species/2/',
'https://swapi.co/api/species/1/'],
'starships': ['https://swapi.co/api/starships/15/',
'https://swapi.co/api/starships/10/',
'https://swapi.co/api/starships/11/',
'https://swapi.co/api/starships/12/',
'https://swapi.co/api/starships/21/',
'https://swapi.co/api/starships/22/',
'https://swapi.co/api/starships/23/',
'https://swapi.co/api/starships/3/',
'https://swapi.co/api/starships/17/'],
'title': 'The Empire Strikes Back',
'url': 'https://swapi.co/api/films/2/',
'vehicles': ['https://swapi.co/api/vehicles/8/',
'https://swapi.co/api/vehicles/14/',
'https://swapi.co/api/vehicles/16/',
'https://swapi.co/api/vehicles/18/',
'https://swapi.co/api/vehicles/19/',
'https://swapi.co/api/vehicles/20/']},
{'characters': ['https://swapi.co/api/people/1/',
'https://swapi.co/api/people/2/',
'https://swapi.co/api/people/3/',
'https://swapi.co/api/people/4/',
'https://swapi.co/api/people/5/',
'https://swapi.co/api/people/6/',
'https://swapi.co/api/people/7/',
'https://swapi.co/api/people/10/',
'https://swapi.co/api/people/11/',
'https://swapi.co/api/people/12/',
'https://swapi.co/api/people/13/',
'https://swapi.co/api/people/20/',
'https://swapi.co/api/people/21/',
'https://swapi.co/api/people/33/',
'https://swapi.co/api/people/46/',
'https://swapi.co/api/people/51/',
'https://swapi.co/api/people/52/',
'https://swapi.co/api/people/53/',
'https://swapi.co/api/people/54/',
'https://swapi.co/api/people/55/',
'https://swapi.co/api/people/56/',
'https://swapi.co/api/people/58/',
'https://swapi.co/api/people/63/',
'https://swapi.co/api/people/64/',
'https://swapi.co/api/people/67/',
'https://swapi.co/api/people/68/',
'https://swapi.co/api/people/75/',
'https://swapi.co/api/people/78/',
'https://swapi.co/api/people/79/',
'https://swapi.co/api/people/80/',
'https://swapi.co/api/people/81/',
'https://swapi.co/api/people/82/',
'https://swapi.co/api/people/83/',
'https://swapi.co/api/people/35/'],
'created': '2014-12-20T18:49:38.403000Z',
'director': 'George Lucas',
'edited': '2015-04-11T09:45:44.862122Z',
'episode_id': 3,
'opening_crawl': 'War! The Republic is crumbling\r\n'
'under attacks by the ruthless\r\n'
'Sith Lord, Count Dooku.\r\n'
'There are heroes on both sides.\r\n'
'Evil is everywhere.\r\n'
'\r\n'
'In a stunning move, the\r\n'
'fiendish droid leader, General\r\n'
'Grievous, has swept into the\r\n'
'Republic capital and kidnapped\r\n'
'Chancellor Palpatine, leader of\r\n'
'the Galactic Senate.\r\n'
'\r\n'
'As the Separatist Droid Army\r\n'
'attempts to flee the besieged\r\n'
'capital with their valuable\r\n'
'hostage, two Jedi Knights lead a\r\n'
'desperate mission to rescue the\r\n'
'captive Chancellor....',
'planets': ['https://swapi.co/api/planets/2/',
'https://swapi.co/api/planets/5/',
'https://swapi.co/api/planets/8/',
'https://swapi.co/api/planets/9/',
'https://swapi.co/api/planets/12/',
'https://swapi.co/api/planets/13/',
'https://swapi.co/api/planets/14/',
'https://swapi.co/api/planets/15/',
'https://swapi.co/api/planets/16/',
'https://swapi.co/api/planets/17/',
'https://swapi.co/api/planets/18/',
'https://swapi.co/api/planets/19/',
'https://swapi.co/api/planets/1/'],
'producer': 'Rick McCallum',
'release_date': '2005-05-19',
'species': ['https://swapi.co/api/species/19/',
'https://swapi.co/api/species/33/',
'https://swapi.co/api/species/2/',
'https://swapi.co/api/species/3/',
'https://swapi.co/api/species/36/',
'https://swapi.co/api/species/37/',
'https://swapi.co/api/species/6/',
'https://swapi.co/api/species/1/',
'https://swapi.co/api/species/34/',
'https://swapi.co/api/species/15/',
'https://swapi.co/api/species/35/',
'https://swapi.co/api/species/20/',
'https://swapi.co/api/species/23/',
'https://swapi.co/api/species/24/',
'https://swapi.co/api/species/25/',
'https://swapi.co/api/species/26/',
'https://swapi.co/api/species/27/',
'https://swapi.co/api/species/28/',
'https://swapi.co/api/species/29/',
'https://swapi.co/api/species/30/'],
'starships': ['https://swapi.co/api/starships/48/',
'https://swapi.co/api/starships/59/',
'https://swapi.co/api/starships/61/',
'https://swapi.co/api/starships/32/',
'https://swapi.co/api/starships/63/',
'https://swapi.co/api/starships/64/',
'https://swapi.co/api/starships/65/',
'https://swapi.co/api/starships/66/',
'https://swapi.co/api/starships/74/',
'https://swapi.co/api/starships/75/',
'https://swapi.co/api/starships/2/',
'https://swapi.co/api/starships/68/'],
'title': 'Revenge of the Sith',
'url': 'https://swapi.co/api/films/6/',
'vehicles': ['https://swapi.co/api/vehicles/33/',
'https://swapi.co/api/vehicles/50/',
'https://swapi.co/api/vehicles/53/',
'https://swapi.co/api/vehicles/56/',
'https://swapi.co/api/vehicles/60/',
'https://swapi.co/api/vehicles/62/',
'https://swapi.co/api/vehicles/67/',
'https://swapi.co/api/vehicles/69/',
'https://swapi.co/api/vehicles/70/',
'https://swapi.co/api/vehicles/71/',
'https://swapi.co/api/vehicles/72/',
'https://swapi.co/api/vehicles/73/',
'https://swapi.co/api/vehicles/76/']},
{'characters': ['https://swapi.co/api/people/1/',
'https://swapi.co/api/people/2/',
'https://swapi.co/api/people/3/',
'https://swapi.co/api/people/4/',
'https://swapi.co/api/people/5/',
'https://swapi.co/api/people/10/',
'https://swapi.co/api/people/13/',
'https://swapi.co/api/people/14/',
'https://swapi.co/api/people/16/',
'https://swapi.co/api/people/18/',
'https://swapi.co/api/people/20/',
'https://swapi.co/api/people/21/',
'https://swapi.co/api/people/22/',
'https://swapi.co/api/people/25/',
'https://swapi.co/api/people/27/',
'https://swapi.co/api/people/28/',
'https://swapi.co/api/people/29/',
'https://swapi.co/api/people/30/',
'https://swapi.co/api/people/31/',
'https://swapi.co/api/people/45/'],
'created': '2014-12-18T10:39:33.255000Z',
'director': 'Richard Marquand',
'edited': '2015-04-11T09:46:05.220365Z',
'episode_id': 6,
'opening_crawl': 'Luke Skywalker has returned to\r\n'
'his home planet of Tatooine in\r\n'
'an attempt to rescue his\r\n'
'friend Han Solo from the\r\n'
'clutches of the vile gangster\r\n'
'Jabba the Hutt.\r\n'
'\r\n'
'Little does Luke know that the\r\n'
'GALACTIC EMPIRE has secretly\r\n'
'begun construction on a new\r\n'
'armored space station even\r\n'
'more powerful than the first\r\n'
'dreaded Death Star.\r\n'
'\r\n'
'When completed, this ultimate\r\n'
'weapon will spell certain doom\r\n'
'for the small band of rebels\r\n'
'struggling to restore freedom\r\n'
'to the galaxy...',
'planets': ['https://swapi.co/api/planets/5/',
'https://swapi.co/api/planets/7/',
'https://swapi.co/api/planets/8/',
'https://swapi.co/api/planets/9/',
'https://swapi.co/api/planets/1/'],
'producer': 'Howard G. Kazanjian, George Lucas, Rick McCallum',
'release_date': '1983-05-25',
'species': ['https://swapi.co/api/species/1/',
'https://swapi.co/api/species/2/',
'https://swapi.co/api/species/3/',
'https://swapi.co/api/species/5/',
'https://swapi.co/api/species/6/',
'https://swapi.co/api/species/8/',
'https://swapi.co/api/species/9/',
'https://swapi.co/api/species/10/',
'https://swapi.co/api/species/15/'],
'starships': ['https://swapi.co/api/starships/15/',
'https://swapi.co/api/starships/10/',
'https://swapi.co/api/starships/11/',
'https://swapi.co/api/starships/12/',
'https://swapi.co/api/starships/22/',
'https://swapi.co/api/starships/23/',
'https://swapi.co/api/starships/27/',
'https://swapi.co/api/starships/28/',
'https://swapi.co/api/starships/29/',
'https://swapi.co/api/starships/3/',
'https://swapi.co/api/starships/17/',
'https://swapi.co/api/starships/2/'],
'title': 'Return of the Jedi',
'url': 'https://swapi.co/api/films/3/',
'vehicles': ['https://swapi.co/api/vehicles/8/',
'https://swapi.co/api/vehicles/16/',
'https://swapi.co/api/vehicles/18/',
'https://swapi.co/api/vehicles/19/',
'https://swapi.co/api/vehicles/24/',
'https://swapi.co/api/vehicles/25/',
'https://swapi.co/api/vehicles/26/',
'https://swapi.co/api/vehicles/30/']},
{'characters': ['https://swapi.co/api/people/1/',
'https://swapi.co/api/people/2/',
'https://swapi.co/api/people/3/',
'https://swapi.co/api/people/4/',
'https://swapi.co/api/people/5/',
'https://swapi.co/api/people/6/',
'https://swapi.co/api/people/7/',
'https://swapi.co/api/people/8/',
'https://swapi.co/api/people/9/',
'https://swapi.co/api/people/10/',
'https://swapi.co/api/people/12/',
'https://swapi.co/api/people/13/',
'https://swapi.co/api/people/14/',
'https://swapi.co/api/people/15/',
'https://swapi.co/api/people/16/',
'https://swapi.co/api/people/18/',
'https://swapi.co/api/people/19/',
'https://swapi.co/api/people/81/'],
'created': '2014-12-10T14:23:31.880000Z',
'director': 'George Lucas',
'edited': '2015-04-11T09:46:52.774897Z',
'episode_id': 4,
'opening_crawl': 'It is a period of civil war.\r\n'
'Rebel spaceships, striking\r\n'
'from a hidden base, have won\r\n'
'their first victory against\r\n'
'the evil Galactic Empire.\r\n'
'\r\n'
'During the battle, Rebel\r\n'
'spies managed to steal secret\r\n'
"plans to the Empire's\r\n"
'ultimate weapon, the DEATH\r\n'
'STAR, an armored space\r\n'
'station with enough power\r\n'
'to destroy an entire planet.\r\n'
'\r\n'
"Pursued by the Empire's\r\n"
'sinister agents, Princess\r\n'
'Leia races home aboard her\r\n'
'starship, custodian of the\r\n'
'stolen plans that can save her\r\n'
'people and restore\r\n'
'freedom to the galaxy....',
'planets': ['https://swapi.co/api/planets/2/',
'https://swapi.co/api/planets/3/',
'https://swapi.co/api/planets/1/'],
'producer': 'Gary Kurtz, Rick McCallum',
'release_date': '1977-05-25',
'species': ['https://swapi.co/api/species/5/',
'https://swapi.co/api/species/3/',
'https://swapi.co/api/species/2/',
'https://swapi.co/api/species/1/',
'https://swapi.co/api/species/4/'],
'starships': ['https://swapi.co/api/starships/2/',
'https://swapi.co/api/starships/3/',
'https://swapi.co/api/starships/5/',
'https://swapi.co/api/starships/9/',
'https://swapi.co/api/starships/10/',
'https://swapi.co/api/starships/11/',
'https://swapi.co/api/starships/12/',
'https://swapi.co/api/starships/13/'],
'title': 'A New Hope',
'url': 'https://swapi.co/api/films/1/',
'vehicles': ['https://swapi.co/api/vehicles/4/',
'https://swapi.co/api/vehicles/6/',
'https://swapi.co/api/vehicles/7/',
'https://swapi.co/api/vehicles/8/']},
{'characters': ['https://swapi.co/api/people/1/',
'https://swapi.co/api/people/3/',
'https://swapi.co/api/people/5/',
'https://swapi.co/api/people/13/',
'https://swapi.co/api/people/14/',
'https://swapi.co/api/people/27/',
'https://swapi.co/api/people/84/',
'https://swapi.co/api/people/85/',
'https://swapi.co/api/people/86/',
'https://swapi.co/api/people/87/',
'https://swapi.co/api/people/88/'],
'created': '2015-04-17T06:51:30.504780Z',
'director': 'J. J. Abrams',
'edited': '2015-12-17T14:31:47.617768Z',
'episode_id': 7,
'opening_crawl': 'Luke Skywalker has vanished.\r\n'
'In his absence, the sinister\r\n'
'FIRST ORDER has risen from\r\n'
'the ashes of the Empire\r\n'
'and will not rest until\r\n'
'Skywalker, the last Jedi,\r\n'
'has been destroyed.\r\n'
' \r\n'
'With the support of the\r\n'
'REPUBLIC, General Leia Organa\r\n'
'leads a brave RESISTANCE.\r\n'
'She is desperate to find her\r\n'
'brother Luke and gain his\r\n'
'help in restoring peace and\r\n'
'justice to the galaxy.\r\n'
' \r\n'
'Leia has sent her most daring\r\n'
'pilot on a secret mission\r\n'
'to Jakku, where an old ally\r\n'
'has discovered a clue to\r\n'
"Luke's whereabouts....",
'planets': ['https://swapi.co/api/planets/61/'],
'producer': 'Kathleen Kennedy, J. J. Abrams, Bryan Burk',
'release_date': '2015-12-11',
'species': ['https://swapi.co/api/species/3/',
'https://swapi.co/api/species/2/',
'https://swapi.co/api/species/1/'],
'starships': ['https://swapi.co/api/starships/77/',
'https://swapi.co/api/starships/10/'],
'title': 'The Force Awakens',
'url': 'https://swapi.co/api/films/7/',
'vehicles': []}],
'gender': 'male',
'hair_color': 'blond',
'height': 172,
'homeworld': {'climate': 'arid',
'created': '2014-12-09T13:50:49.641000Z',
'diameter': 10465,
'edited': '2014-12-21T20:48:04.175778Z',
'films': ['https://swapi.co/api/films/5/',
'https://swapi.co/api/films/4/',
'https://swapi.co/api/films/6/',
'https://swapi.co/api/films/3/',
'https://swapi.co/api/films/1/'],
'gravity': '1 standard',
'name': 'Tatooine',
'orbital_period': 304,
'population': 200000,
'residents': ['https://swapi.co/api/people/1/',
'https://swapi.co/api/people/2/',
'https://swapi.co/api/people/4/',
'https://swapi.co/api/people/6/',
'https://swapi.co/api/people/7/',
'https://swapi.co/api/people/8/',
'https://swapi.co/api/people/9/',
'https://swapi.co/api/people/11/',
'https://swapi.co/api/people/43/',
'https://swapi.co/api/people/62/'],
'rotation_period': 23,
'surface_water': 1,
'terrain': 'desert',
'url': 'https://swapi.co/api/planets/1/'},
'mass': 77,
'name': 'Luke Skywalker',
'skin_color': 'fair',
'species': [{'average_height': 180,
'average_lifespan': 120,
'classification': 'mammal',
'created': '2014-12-10T13:52:11.567000Z',
'designation': 'sentient',
'edited': '2015-04-17T06:59:55.850671Z',
'eye_colors': 'brown, blue, green, hazel, grey, amber',
'films': ['https://swapi.co/api/films/2/',
'https://swapi.co/api/films/7/',
'https://swapi.co/api/films/5/',
'https://swapi.co/api/films/4/',
'https://swapi.co/api/films/6/',
'https://swapi.co/api/films/3/',
'https://swapi.co/api/films/1/'],
'hair_colors': 'blonde, brown, black, red',
'homeworld': 'https://swapi.co/api/planets/9/',
'language': 'Galactic Basic',
'name': 'Human',
'people': ['https://swapi.co/api/people/1/',
'https://swapi.co/api/people/4/',
'https://swapi.co/api/people/5/',
'https://swapi.co/api/people/6/',
'https://swapi.co/api/people/7/',
'https://swapi.co/api/people/9/',
'https://swapi.co/api/people/10/',
'https://swapi.co/api/people/11/',
'https://swapi.co/api/people/12/',
'https://swapi.co/api/people/14/',
'https://swapi.co/api/people/18/',
'https://swapi.co/api/people/19/',
'https://swapi.co/api/people/21/',
'https://swapi.co/api/people/22/',
'https://swapi.co/api/people/25/',
'https://swapi.co/api/people/26/',
'https://swapi.co/api/people/28/',
'https://swapi.co/api/people/29/',
'https://swapi.co/api/people/32/',
'https://swapi.co/api/people/34/',
'https://swapi.co/api/people/43/',
'https://swapi.co/api/people/51/',
'https://swapi.co/api/people/60/',
'https://swapi.co/api/people/61/',
'https://swapi.co/api/people/62/',
'https://swapi.co/api/people/66/',
'https://swapi.co/api/people/67/',
'https://swapi.co/api/people/68/',
'https://swapi.co/api/people/69/',
'https://swapi.co/api/people/74/',
'https://swapi.co/api/people/81/',
'https://swapi.co/api/people/84/',
'https://swapi.co/api/people/85/',
'https://swapi.co/api/people/86/',
'https://swapi.co/api/people/35/'],
'skin_colors': 'caucasian, black, asian, hispanic',
'url': 'https://swapi.co/api/species/1/'}],
'starships': [{'MGLT': 100,
'cargo_capacity': 110,
'consumables': '1 week',
'cost_in_credits': 149999,
'created': '2014-12-12T11:19:05.340000Z',
'crew': 1,
'edited': '2014-12-22T17:35:44.491233Z',
'films': ['https://swapi.co/api/films/2/',
'https://swapi.co/api/films/3/',
'https://swapi.co/api/films/1/'],
'hyperdrive_rating': 1.0,
'length': 12.5,
'manufacturer': 'Incom Corporation',
'max_atmosphering_speed': 1050,
'model': 'T-65 X-wing',
'name': 'X-wing',
'passengers': 0,
'pilots': ['https://swapi.co/api/people/1/',
'https://swapi.co/api/people/9/',
'https://swapi.co/api/people/18/',
'https://swapi.co/api/people/19/'],
'starship_class': 'Starfighter',
'url': 'https://swapi.co/api/starships/12/'},
{'MGLT': 50,
'cargo_capacity': 80000,
'consumables': '2 months',
'cost_in_credits': 240000,
'created': '2014-12-15T13:04:47.235000Z',
'crew': 6,
'edited': '2014-12-22T17:35:44.795405Z',
'films': ['https://swapi.co/api/films/2/',
'https://swapi.co/api/films/3/'],
'hyperdrive_rating': 1.0,
'length': 20,
'manufacturer': 'Sienar Fleet Systems',
'max_atmosphering_speed': 850,
'model': 'Lambda-class T-4a shuttle',
'name': 'Imperial shuttle',
'passengers': 20,
'pilots': ['https://swapi.co/api/people/1/',
'https://swapi.co/api/people/13/',
'https://swapi.co/api/people/14/'],
'starship_class': 'Armed government transport',
'url': 'https://swapi.co/api/starships/22/'}],
'url': {'birth_year': '19BBY',
'created': '2014-12-09T13:50:51.644000Z',
'edited': '2014-12-20T21:17:56.891000Z',
'eye_color': 'blue',
'films': ['https://swapi.co/api/films/2/',
'https://swapi.co/api/films/6/',
'https://swapi.co/api/films/3/',
'https://swapi.co/api/films/1/',
'https://swapi.co/api/films/7/'],
'gender': 'male',
'hair_color': 'blond',
'height': 172,
'homeworld': 'https://swapi.co/api/planets/1/',
'mass': 77,
'name': 'Luke Skywalker',
'skin_color': 'fair',
'species': ['https://swapi.co/api/species/1/'],
'starships': ['https://swapi.co/api/starships/12/',
'https://swapi.co/api/starships/22/'],
'url': 'https://swapi.co/api/people/1/',
'vehicles': ['https://swapi.co/api/vehicles/14/',
'https://swapi.co/api/vehicles/30/']},
'vehicles': [{'cargo_capacity': 10,
'consumables': 'none',
'cost_in_credits': 'unknown',
'created': '2014-12-15T12:22:12Z',
'crew': 2,
'edited': '2014-12-22T18:21:15.623033Z',
'films': ['https://swapi.co/api/films/2/'],
'length': 4.5,
'manufacturer': 'Incom corporation',
'max_atmosphering_speed': 650,
'model': 't-47 airspeeder',
'name': 'Snowspeeder',
'passengers': 0,
'pilots': ['https://swapi.co/api/people/1/',
'https://swapi.co/api/people/18/'],
'url': 'https://swapi.co/api/vehicles/14/',
'vehicle_class': 'airspeeder'},
{'cargo_capacity': 4,
'consumables': '1 day',
'cost_in_credits': 8000,
'created': '2014-12-18T11:20:04.625000Z',
'crew': 1,
'edited': '2014-12-22T18:21:15.920537Z',
'films': ['https://swapi.co/api/films/3/'],
'length': 3,
'manufacturer': 'Aratech Repulsor Company',
'max_atmosphering_speed': 360,
'model': '74-Z speeder bike',
'name': 'Imperial Speeder Bike',
'passengers': 1,
'pilots': ['https://swapi.co/api/people/1/',
'https://swapi.co/api/people/5/'],
'url': 'https://swapi.co/api/vehicles/30/',
'vehicle_class': 'speeder'}]}
Insertion¶
Single inserts¶
In [18]:
result = people.insert_one(data)
In [19]:
db.list_collection_names()
Out[19]:
['people']
Bulk inserts¶
In [20]:
xs = [to_numeric(convert(requests.get('https://swapi.co/api/people/%i' % i).json()))
for i in range(2, 11)]
In [21]:
result = people.insert_many(xs)
In [22]:
result.inserted_ids
Out[22]:
[ObjectId('5b8fc6062b84ad7967ef9949'),
ObjectId('5b8fc6062b84ad7967ef994a'),
ObjectId('5b8fc6062b84ad7967ef994b'),
ObjectId('5b8fc6062b84ad7967ef994c'),
ObjectId('5b8fc6062b84ad7967ef994d'),
ObjectId('5b8fc6062b84ad7967ef994e'),
ObjectId('5b8fc6062b84ad7967ef994f'),
ObjectId('5b8fc6062b84ad7967ef9950'),
ObjectId('5b8fc6062b84ad7967ef9951')]
Queries¶
In [23]:
people.find_one(
# search criteria
{'name': 'Luke Skywalker'},
# values to return
{'name': True,
'hair_color': True,
'skin_color': True,
'eye_color': True
}
)
Out[23]:
{'_id': ObjectId('5b8fc5ee2b84ad7967ef9948'),
'eye_color': 'blue',
'hair_color': 'blond',
'name': 'Luke Skywalker',
'skin_color': 'fair'}
Using object ID¶
Note that ObjectID is NOT a string. You must convert a string to ObjectID before use.
In [24]:
result.inserted_ids[0]
Out[24]:
ObjectId('5b8fc6062b84ad7967ef9949')
In [25]:
people.find_one(
result.inserted_ids[0],
{'name': True, 'hair_color': True, 'skin_color': True, 'eye_color': True}
)
Out[25]:
{'_id': ObjectId('5b8fc6062b84ad7967ef9949'),
'eye_color': 'yellow',
'hair_color': 'n/a',
'name': 'C-3PO',
'skin_color': 'gold'}
Bulk queries¶
In [26]:
for person in people.find({'gender': 'male'}):
print(person['name'])
Luke Skywalker
Darth Vader
Owen Lars
Biggs Darklighter
Obi-Wan Kenobi
In [27]:
for x in people.find(
{'gender': 'male'},
{
'name': True,
'gender': True
}
):
print(x)
{'name': 'Luke Skywalker', 'gender': 'male', '_id': ObjectId('5b8fc5ee2b84ad7967ef9948')}
{'name': 'Darth Vader', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef994b')}
{'name': 'Owen Lars', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef994d')}
{'name': 'Biggs Darklighter', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef9950')}
{'name': 'Obi-Wan Kenobi', 'gender': 'male', '_id': ObjectId('5b8fc6062b84ad7967ef9951')}
Using regex search¶
In [28]:
for x in people.find(
{
'name': {'$regex': '^L'},
},
{
'name': True,
'gender': True,
'_id': False
}
):
pprint(x)
{'gender': 'male', 'name': 'Luke Skywalker'}
{'gender': 'female', 'name': 'Leia Organa'}
Alternative using Python regular expressions.
In [29]:
import re
name_pat = re.compile(r'^l', re.IGNORECASE)
In [30]:
for x in people.find(
{
'name': name_pat,
},
{
'name': True,
'gender': True,
'_id': False
}
):
pprint(x)
{'gender': 'male', 'name': 'Luke Skywalker'}
{'gender': 'female', 'name': 'Leia Organa'}
Using relational operators¶
In [31]:
for x in people.find(
{
'mass': {'$gt': 100},
},
{
'name': True,
'mass': True,
'_id': False
}
):
pprint(x)
{'mass': 136, 'name': 'Darth Vader'}
{'mass': 120, 'name': 'Owen Lars'}
In [32]:
mass_range = {'$lt': 100, '$gt': 50}
In [34]:
for x in people.find(
{
'mass': mass_range,
},
{
'name': True,
'mass': True,
'_id': False
}
):
print(x)
{'mass': 77, 'name': 'Luke Skywalker'}
{'mass': 75, 'name': 'C-3PO'}
{'mass': 75, 'name': 'Beru Whitesun lars'}
{'mass': 84, 'name': 'Biggs Darklighter'}
{'mass': 77, 'name': 'Obi-Wan Kenobi'}
In [35]:
for x in people.find(
{
'mass': mass_range,
},
{
'name': True,
'mass': True,
'_id': False
}
):
pprint(x)
{'mass': 77, 'name': 'Luke Skywalker'}
{'mass': 75, 'name': 'C-3PO'}
{'mass': 75, 'name': 'Beru Whitesun lars'}
{'mass': 84, 'name': 'Biggs Darklighter'}
{'mass': 77, 'name': 'Obi-Wan Kenobi'}
Nested search¶
In [36]:
for x in people.find(
{
'species.name': 'Human',
},
{
'name': True,
'species.name': True,
'_id': False
}
):
pprint(x)
{'name': 'Luke Skywalker', 'species': [{'name': 'Human'}]}
{'name': 'Darth Vader', 'species': [{'name': 'Human'}]}
{'name': 'Leia Organa', 'species': [{'name': 'Human'}]}
{'name': 'Owen Lars', 'species': [{'name': 'Human'}]}
{'name': 'Beru Whitesun lars', 'species': [{'name': 'Human'}]}
{'name': 'Biggs Darklighter', 'species': [{'name': 'Human'}]}
{'name': 'Obi-Wan Kenobi', 'species': [{'name': 'Human'}]}
Matching multiple criteria¶
In [37]:
for x in people.find(
{
'starships.cost_in_credits': {'$lt': 250000},
'starships.max_atmosphering_speed': {'$gt': 500},
'starships.passengers': {'$gt': 0}
},
{
'name': True,
'starship.name': True,
'starships.max_atmosphering_speed': True,
'starships.passengers': True,
'starships.cost_in_credits': True,
'_id': False
}
):
pprint(x)
{'name': 'Luke Skywalker',
'starships': [{'cost_in_credits': 149999,
'max_atmosphering_speed': 1050,
'passengers': 0},
{'cost_in_credits': 240000,
'max_atmosphering_speed': 850,
'passengers': 20}]}
{'name': 'Obi-Wan Kenobi',
'starships': [{'cost_in_credits': 180000,
'max_atmosphering_speed': 1150,
'passengers': 0},
{'cost_in_credits': 125000000,
'max_atmosphering_speed': 1050,
'passengers': 48247},
{'cost_in_credits': 'unknown',
'max_atmosphering_speed': 1050,
'passengers': 3},
{'cost_in_credits': 320000,
'max_atmosphering_speed': 1500,
'passengers': 0},
{'cost_in_credits': 168000,
'max_atmosphering_speed': 1100,
'passengers': 0}]}
Matching multiple criteria simultaneously¶
In [38]:
for x in people.find(
{
'starships': {
'$elemMatch': {
'cost_in_credits': {'$lt': 250000},
'max_atmosphering_speed': {'$gt': 500},
'passengers': {'$gt': 1}
}
}
},
{
'name': True,
'starship.name': True,
'starships.max_atmosphering_speed': True,
'starships.passengers': True,
'starships.cost_in_credits': True,
'_id': False
}
):
pprint(x)
{'name': 'Luke Skywalker',
'starships': [{'cost_in_credits': 149999,
'max_atmosphering_speed': 1050,
'passengers': 0},
{'cost_in_credits': 240000,
'max_atmosphering_speed': 850,
'passengers': 20}]}
Indexes¶
In [39]:
people.find({}).explain
Out[39]:
<bound method Cursor.explain of <pymongo.cursor.Cursor object at 0x104c2c9e8>>
In [40]:
people.find({'name': 'Luke Skywalker'}).explain()
Out[40]:
{'executionStats': {'allPlansExecution': [],
'executionStages': {'advanced': 1,
'direction': 'forward',
'docsExamined': 10,
'executionTimeMillisEstimate': 0,
'filter': {'name': {'$eq': 'Luke Skywalker'}},
'invalidates': 0,
'isEOF': 1,
'nReturned': 1,
'needTime': 10,
'needYield': 0,
'restoreState': 0,
'saveState': 0,
'stage': 'COLLSCAN',
'works': 12},
'executionSuccess': True,
'executionTimeMillis': 0,
'nReturned': 1,
'totalDocsExamined': 10,
'totalKeysExamined': 0},
'ok': 1.0,
'queryPlanner': {'indexFilterSet': False,
'namespace': 'starwars.people',
'parsedQuery': {'name': {'$eq': 'Luke Skywalker'}},
'plannerVersion': 1,
'rejectedPlans': [],
'winningPlan': {'direction': 'forward',
'filter': {'name': {'$eq': 'Luke Skywalker'}},
'stage': 'COLLSCAN'}},
'serverInfo': {'gitVersion': 'fc1573ba18aee42f97a3bb13b67af7d837826b47',
'host': 'eris',
'port': 27017,
'version': '4.0.2'}}
In [41]:
people.create_index('name')
Out[41]:
'name_1'
In [42]:
people.find({'name': 'Luke Skywalker'}).explain()
Out[42]:
{'executionStats': {'allPlansExecution': [],
'executionStages': {'advanced': 1,
'alreadyHasObj': 0,
'docsExamined': 1,
'executionTimeMillisEstimate': 0,
'inputStage': {'advanced': 1,
'direction': 'forward',
'dupsDropped': 0,
'dupsTested': 0,
'executionTimeMillisEstimate': 0,
'indexBounds': {'name': ['["Luke Skywalker", "Luke Skywalker"]']},
'indexName': 'name_1',
'indexVersion': 2,
'invalidates': 0,
'isEOF': 1,
'isMultiKey': False,
'isPartial': False,
'isSparse': False,
'isUnique': False,
'keyPattern': {'name': 1},
'keysExamined': 1,
'multiKeyPaths': {'name': []},
'nReturned': 1,
'needTime': 0,
'needYield': 0,
'restoreState': 0,
'saveState': 0,
'seeks': 1,
'seenInvalidated': 0,
'stage': 'IXSCAN',
'works': 2},
'invalidates': 0,
'isEOF': 1,
'nReturned': 1,
'needTime': 0,
'needYield': 0,
'restoreState': 0,
'saveState': 0,
'stage': 'FETCH',
'works': 2},
'executionSuccess': True,
'executionTimeMillis': 3,
'nReturned': 1,
'totalDocsExamined': 1,
'totalKeysExamined': 1},
'ok': 1.0,
'queryPlanner': {'indexFilterSet': False,
'namespace': 'starwars.people',
'parsedQuery': {'name': {'$eq': 'Luke Skywalker'}},
'plannerVersion': 1,
'rejectedPlans': [],
'winningPlan': {'inputStage': {'direction': 'forward',
'indexBounds': {'name': ['["Luke Skywalker", "Luke Skywalker"]']},
'indexName': 'name_1',
'indexVersion': 2,
'isMultiKey': False,
'isPartial': False,
'isSparse': False,
'isUnique': False,
'keyPattern': {'name': 1},
'multiKeyPaths': {'name': []},
'stage': 'IXSCAN'},
'stage': 'FETCH'}},
'serverInfo': {'gitVersion': 'fc1573ba18aee42f97a3bb13b67af7d837826b47',
'host': 'eris',
'port': 27017,
'version': '4.0.2'}}
Aggregate Queries¶
In [43]:
people.count_documents({'species.name': 'Human'})
Out[43]:
7
Using aggregate¶
The aggregate
function runs a pipeline of commands, and uses the
$group
operator to summarize results.
In [44]:
from collections import OrderedDict
Filter and count
In [45]:
cmds = [
{'$match': {'species.name': 'Human'}},
{'$group': {'_id': '$species.name', 'count': {'$sum': 1}}},
]
In [46]:
pprint(list(people.aggregate(cmds)))
[{'_id': ['Human'], 'count': 7}]
Filter and find total mass
In [47]:
cmds = [
{'$match': {'species.name': 'Human'}},
{'$group': {'_id': '$species.name', 'total_mass': {'$sum': '$mass'}}},
]
In [48]:
pprint(list(people.aggregate(cmds)))
[{'_id': ['Human'], 'total_mass': 618}]
Total mass of each species
In [49]:
cmds = [
{'$group': {'_id': '$species.name', 'total_mass': {'$sum': '$mass'}}},
]
In [50]:
pprint(list(people.aggregate(cmds)))
[{'_id': ['Droid'], 'total_mass': 139}, {'_id': ['Human'], 'total_mass': 618}]
Total and average mass of each species
In [51]:
cmds = [
{
'$group': {
'_id': '$species.name',
'total_mass': {'$sum': '$mass'},
'avg_mass': {'$avg': '$mass'}
}
},
]
In [52]:
pprint(list(people.aggregate(cmds)))
[{'_id': ['Droid'], 'avg_mass': 46.333333333333336, 'total_mass': 139},
{'_id': ['Human'], 'avg_mass': 88.28571428571429, 'total_mass': 618}]
Using MapReduce¶
With MapReduce
you get the full power of JavaScript, but it is more
complex and often less efficient. You should use aggregate
in
preference to map_reduce
in most cases.
- In the map stage, you create a (key, value) pair
- In the reduce stage, you perform a reduction (e.g. sum) of the values associated with each key
In [53]:
from bson.code import Code
Count the number by eye_color.
In [54]:
mapper = Code('''
function() {
emit(this.eye_color, 1);
}
''')
reducer = Code('''
function (key, values) {
var total = 0;
for (var i = 0; i < values.length; i++) {
total += values[i];
}
return total;
}
''')
result = people.map_reduce(
mapper,
reducer,
'result1'
)
In [55]:
for doc in result.find():
pprint(doc)
{'_id': 'blue', 'value': 3.0}
{'_id': 'blue-gray', 'value': 1.0}
{'_id': 'brown', 'value': 2.0}
{'_id': 'red', 'value': 2.0}
{'_id': 'yellow', 'value': 2.0}
The output is also stored in the result1
collection we specified.
In [56]:
list(db.result1.find())
Out[56]:
[{'_id': 'blue', 'value': 3.0},
{'_id': 'blue-gray', 'value': 1.0},
{'_id': 'brown', 'value': 2.0},
{'_id': 'red', 'value': 2.0},
{'_id': 'yellow', 'value': 2.0}]
Using JavaScript Array functions to simplify code.
In [57]:
mapper = Code('''
function() {
emit(this.eye_color, 1);
}
''')
reducer = Code('''
function (key, values) {
return Array.sum(values);
}
''')
result = people.map_reduce(
mapper,
reducer,
'result2'
)
In [58]:
for doc in result.find():
pprint(doc)
{'_id': 'blue', 'value': 3.0}
{'_id': 'blue-gray', 'value': 1.0}
{'_id': 'brown', 'value': 2.0}
{'_id': 'red', 'value': 2.0}
{'_id': 'yellow', 'value': 2.0}
Find avergae mass by gender.
In [59]:
mapper = Code('''
function() {
emit(this.gender, this.mass);
}
''')
reducer = Code('''
function (key, values) {
return Array.avg(values);
}
''')
result = people.map_reduce(
mapper,
reducer,
'result3'
)
In [60]:
for doc in result.find():
pprint(doc)
{'_id': 'female', 'value': 62.0}
{'_id': 'male', 'value': 98.8}
{'_id': 'n/a', 'value': 46.333333333333336}
Count number of members in each species
In [61]:
mapper = Code('''
function() {
this.species.map(function(z) {
emit(z.name, 1);
})
}
''')
reducer = Code('''
function (key, values) {
return Array.sum(values);
}
''')
result = people.map_reduce(
mapper,
reducer,
'result3'
)
In [62]:
for doc in result.find():
pprint(doc)
{'_id': 'Droid', 'value': 3.0}
{'_id': 'Human', 'value': 7.0}
Geospatial queries¶
In [63]:
crime = db.crime
In [64]:
import json
In [65]:
path = 'data/crime-mapping.geojson'
with open(path) as f:
datastore = json.load(f)
In [66]:
results = crime.insert_many(datastore['features'])
In [67]:
crime.find_one({})
Out[67]:
{'_id': ObjectId('5b8fc6472b84ad7967ef9952'),
'geometry': {'coordinates': [-78.78200313, 35.760212065], 'type': 'Point'},
'properties': {'activity_date': None,
'apartment_complex': None,
'beat_number': '112',
'chrgcnt': None,
'crime_category': 'ALL OTHER',
'crime_type': 'ALL OTHER - ESCAPE FROM CUSTODY OR RESIST ARREST',
'crimeday': 'THURSDAY',
'date_from': '2017-11-30',
'date_to': '11/30/2017',
'district': 'D3',
'domestic': 'N',
'incident_number': '17010528',
'lat': 35.760212065,
'location_category': 'TOWN OWNED',
'lon': -78.78200313,
'map_reference': 'P027',
'offensecategory': 'All Other Offenses',
'period': ['Everything', 'Last Year'],
'phxcommunity': 'No',
'phxrecordstatus': None,
'phxstatus': None,
'radio': 'Everything,Last Year',
'record': 3145,
'residential_subdivision': 'SHOPPES OF KILDAIRE',
'street': 'KILDAIRE FARM RD',
'time_from': -62135553600,
'time_to': -62135553600,
'timeframe': ['Last Year'],
'ucr': '2650',
'violentproperty': 'All Other'},
'type': 'Feature'}
In [68]:
crime.find_one({},
{
'geometry': 1,
'_id': 0,
}
)
Out[68]:
{'geometry': {'coordinates': [-78.78200313, 35.760212065], 'type': 'Point'}}
In [69]:
crime.create_index([('geometry', GEOSPHERE)])
Out[69]:
'geometry_2dsphere'
List 5 crimes near the location
In [70]:
loc = SON([('type', 'Point'), ('coordinates', [-78.78200313, 35.760212065])])
for doc in crime.find(
{
'geometry' : SON([('$near', {'$geometry' : loc})])
},
{
'_id': 0,
'properties.crime_type': 1,
'properties.date_from': 1
}
).limit(5):
pprint(doc)
{'properties': {'crime_type': 'ALL OTHER - ESCAPE FROM CUSTODY OR RESIST '
'ARREST',
'date_from': '2017-11-30'}}
{'properties': {'crime_type': 'LARCENY - AUTO PARTS OR ACCESSORIES',
'date_from': '2018-03-20'}}
{'properties': {'crime_type': 'COUNTERFEITING - USING',
'date_from': '2018-08-05'}}
{'properties': {'crime_type': 'DRUGS - DRUG VIOLATIONS '
'(POSS./SELL/MAN./DEL./TRNSPRT/CULT.)',
'date_from': '2017-11-30'}}
{'properties': {'crime_type': 'VANDALISM - DAMAGE TO PROPERTY',
'date_from': '2018-03-26'}}
List crimes committed nearby (within 200 m)
In [71]:
loc = SON([('type', 'Point'), ('coordinates', [-78.78200313, 35.760212065])])
for doc in crime.find(
{
'geometry' : SON([('$geoNear', {'$geometry' : loc, '$minDistance': 1e-6, '$maxDistance': 200})]),
},
{
'_id': 0,
'geometry.coordinates': 1,
'properties.crime_type': 1,
'properties.date_from': 1
}
):
pprint(doc)
{'geometry': {'coordinates': [-78.78102423, 35.7607323]},
'properties': {'crime_type': 'ASSAULT - SIMPLE - ALL OTHER',
'date_from': '2018-02-14'}}
{'geometry': {'coordinates': [-78.78131931, 35.761138061]},
'properties': {'crime_type': 'VANDALISM - GRAFFITI',
'date_from': '2018-07-20'}}
{'geometry': {'coordinates': [-78.7827814, 35.759087052]},
'properties': {'crime_type': 'VANDALISM - GRAFFITI',
'date_from': '2018-07-29'}}
In [ ]: