{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" first | \n",
" last | \n",
" major | \n",
" state_abbrev | \n",
" state_name | \n",
" country | \n",
" course_id | \n",
" course_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" ann | \n",
" ang | \n",
" [(stats, Statistics), (biol, Biology)] | \n",
" NC | \n",
" North Carolina | \n",
" USA | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" [(stat, Statistics), (math, Mathematics)] | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" [(stat, Statistics), (math, Mathematics)] | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS102 | \n",
" Medicine for frogs | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id first last major \\\n",
"0 1 ann ang [(stats, Statistics), (biol, Biology)] \n",
"1 2 barry bonds [(stat, Statistics), (math, Mathematics)] \n",
"2 2 barry bonds [(stat, Statistics), (math, Mathematics)] \n",
"\n",
" state_abbrev state_name country course_id course_name \n",
"0 NC North Carolina USA BIOS101 Frogs in medicine \n",
"1 QC Quebec Canada BIOS101 Frogs in medicine \n",
"2 QC Quebec Canada BIOS102 Medicine for frogs "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(\n",
" dict(\n",
" student_id = [1,2,2],\n",
" first = ['ann', 'barry', 'barry'],\n",
" last = ['ang', 'bonds', 'bonds'],\n",
" major = [[('stats', 'Statistics'), ('biol', 'Biology')], \n",
" [('stat', 'Statistics'),('math', 'Mathematics')],\n",
" [('stat', 'Statistics'),('math', 'Mathematics')]],\n",
" state_abbrev = ['NC', 'QC', 'QC'],\n",
" state_name = ['North Carolina', 'Quebec', 'Quebec'],\n",
" country = ['USA', 'Canada', 'Canada'],\n",
" course_id = ['BIOS101', 'BIOS101', 'BIOS102'],\n",
" course_name = [\"Frogs in medicine\", \n",
" \"Frogs in medicine\", \n",
" \"Medicine for frogs\"]\n",
" )\n",
")\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Normalization\n",
"\n",
"- Every table should not have any \n",
" - duplication\n",
" - dependencies that are not key or domain constraints"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1NF\n",
"\n",
"- Split composite entries"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" first | \n",
" last | \n",
" major | \n",
" state_abbrev | \n",
" state_name | \n",
" country | \n",
" course_id | \n",
" course_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" ann | \n",
" ang | \n",
" (stats, Statistics) | \n",
" NC | \n",
" North Carolina | \n",
" USA | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
"
\n",
" \n",
" 0 | \n",
" 1 | \n",
" ann | \n",
" ang | \n",
" (biol, Biology) | \n",
" NC | \n",
" North Carolina | \n",
" USA | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" (stat, Statistics) | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" (math, Mathematics) | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" (stat, Statistics) | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS102 | \n",
" Medicine for frogs | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" (math, Mathematics) | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS102 | \n",
" Medicine for frogs | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id first last major state_abbrev state_name \\\n",
"0 1 ann ang (stats, Statistics) NC North Carolina \n",
"0 1 ann ang (biol, Biology) NC North Carolina \n",
"1 2 barry bonds (stat, Statistics) QC Quebec \n",
"1 2 barry bonds (math, Mathematics) QC Quebec \n",
"2 2 barry bonds (stat, Statistics) QC Quebec \n",
"2 2 barry bonds (math, Mathematics) QC Quebec \n",
"\n",
" country course_id course_name \n",
"0 USA BIOS101 Frogs in medicine \n",
"0 USA BIOS101 Frogs in medicine \n",
"1 Canada BIOS101 Frogs in medicine \n",
"1 Canada BIOS101 Frogs in medicine \n",
"2 Canada BIOS102 Medicine for frogs \n",
"2 Canada BIOS102 Medicine for frogs "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1 = df.explode('major')\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" first | \n",
" last | \n",
" state_abbrev | \n",
" state_name | \n",
" country | \n",
" course_id | \n",
" course_name | \n",
" major_id | \n",
" major_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" ann | \n",
" ang | \n",
" NC | \n",
" North Carolina | \n",
" USA | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
" stats | \n",
" Statistics | \n",
"
\n",
" \n",
" 0 | \n",
" 1 | \n",
" ann | \n",
" ang | \n",
" NC | \n",
" North Carolina | \n",
" USA | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
" biol | \n",
" Biology | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
" stat | \n",
" Statistics | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
" math | \n",
" Mathematics | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS102 | \n",
" Medicine for frogs | \n",
" stat | \n",
" Statistics | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS102 | \n",
" Medicine for frogs | \n",
" math | \n",
" Mathematics | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id first last state_abbrev state_name country course_id \\\n",
"0 1 ann ang NC North Carolina USA BIOS101 \n",
"0 1 ann ang NC North Carolina USA BIOS101 \n",
"1 2 barry bonds QC Quebec Canada BIOS101 \n",
"1 2 barry bonds QC Quebec Canada BIOS101 \n",
"2 2 barry bonds QC Quebec Canada BIOS102 \n",
"2 2 barry bonds QC Quebec Canada BIOS102 \n",
"\n",
" course_name major_id major_name \n",
"0 Frogs in medicine stats Statistics \n",
"0 Frogs in medicine biol Biology \n",
"1 Frogs in medicine stat Statistics \n",
"1 Frogs in medicine math Mathematics \n",
"2 Medicine for frogs stat Statistics \n",
"2 Medicine for frogs math Mathematics "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_cols = ['major_id', 'major_name']\n",
"for i, c in enumerate(new_cols):\n",
" df1[c] = df1.major.apply(lambda x: x[i])\n",
"df1 = df1.drop('major', axis=1)\n",
"df1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2NF\n",
"\n",
"- Break partial dependencies\n",
" - Identify candidate PK for each row\n",
" - If there is a composite PK, see if other columns have partial dependencies"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" first | \n",
" last | \n",
" state_abbrev | \n",
" state_name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" ann | \n",
" ang | \n",
" NC | \n",
" North Carolina | \n",
" USA | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id first last state_abbrev state_name country\n",
"0 1 ann ang NC North Carolina USA\n",
"1 2 barry bonds QC Quebec Canada"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_student = df1.iloc[:, [0,1,2,3,4,5]].drop_duplicates()\n",
"df_student"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" course_id | \n",
" course_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
"
\n",
" \n",
" 2 | \n",
" BIOS102 | \n",
" Medicine for frogs | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" course_id course_name\n",
"0 BIOS101 Frogs in medicine\n",
"2 BIOS102 Medicine for frogs"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_course = df1.iloc[:,6:8].drop_duplicates()\n",
"df_course"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" major_id | \n",
" major_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" stats | \n",
" Statistics | \n",
"
\n",
" \n",
" 0 | \n",
" biol | \n",
" Biology | \n",
"
\n",
" \n",
" 1 | \n",
" stat | \n",
" Statistics | \n",
"
\n",
" \n",
" 1 | \n",
" math | \n",
" Mathematics | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" major_id major_name\n",
"0 stats Statistics\n",
"0 biol Biology\n",
"1 stat Statistics\n",
"1 math Mathematics"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_major = df1.iloc[:,8:10].drop_duplicates()\n",
"df_major"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" course_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" BIOS101 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" BIOS101 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" BIOS102 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id course_id\n",
"0 1 BIOS101\n",
"1 2 BIOS101\n",
"2 2 BIOS102"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_student_course = df1.iloc[:, [0, 6]].drop_duplicates()\n",
"df_student_course"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" major_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" stats | \n",
"
\n",
" \n",
" 0 | \n",
" 1 | \n",
" biol | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" stat | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" math | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id major_id\n",
"0 1 stats\n",
"0 1 biol\n",
"1 2 stat\n",
"1 2 math"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_student_major = df1.iloc[:, [0, 8]].drop_duplicates()\n",
"df_student_major"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3NF\n",
"\n",
"- Remove transitive dependencies"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" first | \n",
" last | \n",
" state_abbrev | \n",
" state_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" ann | \n",
" ang | \n",
" NC | \n",
" North Carolina | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id first last state_abbrev state_name\n",
"0 1 ann ang NC North Carolina\n",
"1 2 barry bonds QC Quebec"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_student_ = df_student.iloc[:, :5].drop_duplicates()\n",
"df_student_"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state_name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" North Carolina | \n",
" USA | \n",
"
\n",
" \n",
" 1 | \n",
" Quebec | \n",
" Canada | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state_name country\n",
"0 North Carolina USA\n",
"1 Quebec Canada"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_origin = df_student.iloc[:, 4:7].drop_duplicates()\n",
"df_origin"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Final tables"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" first | \n",
" last | \n",
" state_abbrev | \n",
" state_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" ann | \n",
" ang | \n",
" NC | \n",
" North Carolina | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id first last state_abbrev state_name\n",
"0 1 ann ang NC North Carolina\n",
"1 2 barry bonds QC Quebec"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_student_"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state_name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" North Carolina | \n",
" USA | \n",
"
\n",
" \n",
" 1 | \n",
" Quebec | \n",
" Canada | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state_name country\n",
"0 North Carolina USA\n",
"1 Quebec Canada"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_origin"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" course_id | \n",
" course_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
"
\n",
" \n",
" 2 | \n",
" BIOS102 | \n",
" Medicine for frogs | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" course_id course_name\n",
"0 BIOS101 Frogs in medicine\n",
"2 BIOS102 Medicine for frogs"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_course"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" course_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" BIOS101 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" BIOS101 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" BIOS102 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id course_id\n",
"0 1 BIOS101\n",
"1 2 BIOS101\n",
"2 2 BIOS102"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_student_course"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" major_id | \n",
" major_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" stats | \n",
" Statistics | \n",
"
\n",
" \n",
" 0 | \n",
" biol | \n",
" Biology | \n",
"
\n",
" \n",
" 1 | \n",
" stat | \n",
" Statistics | \n",
"
\n",
" \n",
" 1 | \n",
" math | \n",
" Mathematics | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" major_id major_name\n",
"0 stats Statistics\n",
"0 biol Biology\n",
"1 stat Statistics\n",
"1 math Mathematics"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_major"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" major_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" stats | \n",
"
\n",
" \n",
" 0 | \n",
" 1 | \n",
" biol | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" stat | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" math | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id major_id\n",
"0 1 stats\n",
"0 1 biol\n",
"1 2 stat\n",
"1 2 math"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_student_major"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Denormalization"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" first | \n",
" last | \n",
" state_abbrev | \n",
" state_name | \n",
" country | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" ann | \n",
" ang | \n",
" NC | \n",
" North Carolina | \n",
" USA | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id first last state_abbrev state_name country\n",
"0 1 ann ang NC North Carolina USA\n",
"1 2 barry bonds QC Quebec Canada"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_student_.merge(df_origin)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" student_id | \n",
" first | \n",
" last | \n",
" state_abbrev | \n",
" state_name | \n",
" country | \n",
" course_id | \n",
" course_name | \n",
" major_id | \n",
" major_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" ann | \n",
" ang | \n",
" NC | \n",
" North Carolina | \n",
" USA | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
" stats | \n",
" Statistics | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" ann | \n",
" ang | \n",
" NC | \n",
" North Carolina | \n",
" USA | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
" biol | \n",
" Biology | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
" stat | \n",
" Statistics | \n",
"
\n",
" \n",
" 3 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS102 | \n",
" Medicine for frogs | \n",
" stat | \n",
" Statistics | \n",
"
\n",
" \n",
" 4 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS101 | \n",
" Frogs in medicine | \n",
" math | \n",
" Mathematics | \n",
"
\n",
" \n",
" 5 | \n",
" 2 | \n",
" barry | \n",
" bonds | \n",
" QC | \n",
" Quebec | \n",
" Canada | \n",
" BIOS102 | \n",
" Medicine for frogs | \n",
" math | \n",
" Mathematics | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" student_id first last state_abbrev state_name country course_id \\\n",
"0 1 ann ang NC North Carolina USA BIOS101 \n",
"1 1 ann ang NC North Carolina USA BIOS101 \n",
"2 2 barry bonds QC Quebec Canada BIOS101 \n",
"3 2 barry bonds QC Quebec Canada BIOS102 \n",
"4 2 barry bonds QC Quebec Canada BIOS101 \n",
"5 2 barry bonds QC Quebec Canada BIOS102 \n",
"\n",
" course_name major_id major_name \n",
"0 Frogs in medicine stats Statistics \n",
"1 Frogs in medicine biol Biology \n",
"2 Frogs in medicine stat Statistics \n",
"3 Medicine for frogs stat Statistics \n",
"4 Frogs in medicine math Mathematics \n",
"5 Medicine for frogs math Mathematics "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(\n",
" df_student_.\n",
" merge(df_origin).\n",
" merge(df_student_course).\n",
" merge(df_course).\n",
" merge(df_student_major).\n",
" merge(df_major)\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}