{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Processing data\n",
"\n",
"Many ML algorithms need data that is numeric, complete (no missing) and standardized. Ensembles of trees are the most accommodating and require the least data processing."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"url = (\n",
" 'http://biostat.mc.vanderbilt.edu/' \n",
" 'wiki/pub/Main/DataSets/titanic3.xls'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_excel(url)\n",
"df_orig = df.copy()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Basic inspection"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pclass | \n",
" survived | \n",
" name | \n",
" sex | \n",
" age | \n",
" sibsp | \n",
" parch | \n",
" ticket | \n",
" fare | \n",
" cabin | \n",
" embarked | \n",
" boat | \n",
" body | \n",
" home.dest | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1117 | \n",
" 3 | \n",
" 0 | \n",
" Pekoniemi, Mr. Edvard | \n",
" male | \n",
" 21.0 | \n",
" 0 | \n",
" 0 | \n",
" STON/O 2. 3101294 | \n",
" 7.9250 | \n",
" NaN | \n",
" S | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 412 | \n",
" 2 | \n",
" 0 | \n",
" Fynney, Mr. Joseph J | \n",
" male | \n",
" 35.0 | \n",
" 0 | \n",
" 0 | \n",
" 239865 | \n",
" 26.0000 | \n",
" NaN | \n",
" S | \n",
" NaN | \n",
" 322.0 | \n",
" Liverpool / Montreal, PQ | \n",
"
\n",
" \n",
" | 64 | \n",
" 1 | \n",
" 1 | \n",
" Chambers, Mr. Norman Campbell | \n",
" male | \n",
" 27.0 | \n",
" 1 | \n",
" 0 | \n",
" 113806 | \n",
" 53.1000 | \n",
" E8 | \n",
" S | \n",
" 5 | \n",
" NaN | \n",
" New York, NY / Ithaca, NY | \n",
"
\n",
" \n",
" | 650 | \n",
" 3 | \n",
" 0 | \n",
" Attalah, Miss. Malake | \n",
" female | \n",
" 17.0 | \n",
" 0 | \n",
" 0 | \n",
" 2627 | \n",
" 14.4583 | \n",
" NaN | \n",
" C | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 912 | \n",
" 3 | \n",
" 0 | \n",
" Karaic, Mr. Milan | \n",
" male | \n",
" 30.0 | \n",
" 0 | \n",
" 0 | \n",
" 349246 | \n",
" 7.8958 | \n",
" NaN | \n",
" S | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pclass survived name sex age sibsp \\\n",
"1117 3 0 Pekoniemi, Mr. Edvard male 21.0 0 \n",
"412 2 0 Fynney, Mr. Joseph J male 35.0 0 \n",
"64 1 1 Chambers, Mr. Norman Campbell male 27.0 1 \n",
"650 3 0 Attalah, Miss. Malake female 17.0 0 \n",
"912 3 0 Karaic, Mr. Milan male 30.0 0 \n",
"\n",
" parch ticket fare cabin embarked boat body \\\n",
"1117 0 STON/O 2. 3101294 7.9250 NaN S NaN NaN \n",
"412 0 239865 26.0000 NaN S NaN 322.0 \n",
"64 0 113806 53.1000 E8 S 5 NaN \n",
"650 0 2627 14.4583 NaN C NaN NaN \n",
"912 0 349246 7.8958 NaN S NaN NaN \n",
"\n",
" home.dest \n",
"1117 NaN \n",
"412 Liverpool / Montreal, PQ \n",
"64 New York, NY / Ithaca, NY \n",
"650 NaN \n",
"912 NaN "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sample(5)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 1309 entries, 0 to 1308\n",
"Data columns (total 14 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 pclass 1309 non-null int64 \n",
" 1 survived 1309 non-null int64 \n",
" 2 name 1309 non-null object \n",
" 3 sex 1309 non-null object \n",
" 4 age 1046 non-null float64\n",
" 5 sibsp 1309 non-null int64 \n",
" 6 parch 1309 non-null int64 \n",
" 7 ticket 1309 non-null object \n",
" 8 fare 1308 non-null float64\n",
" 9 cabin 295 non-null object \n",
" 10 embarked 1307 non-null object \n",
" 11 boat 486 non-null object \n",
" 12 body 121 non-null float64\n",
" 13 home.dest 745 non-null object \n",
"dtypes: float64(3), int64(4), object(7)\n",
"memory usage: 143.3+ KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Detailed inspection"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import pandas_profiling as pp"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f795bdbdb3664dd2b1c60d9a8419305c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Summarize dataset', max=28.0, style=ProgressStyle(descrip…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9be24b7caee241e4be22631c1f2c3141",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Generate report structure', max=1.0, style=ProgressStyle(…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "575dd299c8224a7c98f73af9644d9b09",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Render HTML', max=1.0, style=ProgressStyle(description_wi…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": []
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pp.ProfileReport(df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create new features\n",
"\n",
"Sometimes we want to create new features from existing columns. For example, the names column can be mined to extract titles. We illustrate how to do this, but will not use this here."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Mr 757\n",
"Miss 260\n",
"Mrs 197\n",
"Master 61\n",
"Dr 8\n",
"Rev 8\n",
"Col 4\n",
"Major 2\n",
"Mlle 2\n",
"Ms 2\n",
"Don 1\n",
"Lady 1\n",
"Mme 1\n",
"Jonkheer 1\n",
"Sir 1\n",
"Capt 1\n",
"Dona 1\n",
"Countess 1\n",
"Name: title, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['title'] = df.name.str.extract('.*([A-Z][a-z]+)\\..*')\n",
"df.title.value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Drop features\n",
"\n",
"These features are either uninformative or leak information about the outcome."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"target = df.survived\n",
"df = df.drop(columns = [\n",
" 'survived', 'name', 'ticket', 'cabin'\n",
" ,'boat', 'body', 'home.dest', 'title'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Inspect for missing data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import missingno as mn"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABcQAAAKECAYAAAApNjlsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABmiUlEQVR4nO3deZxdd10//tcnM0mTlBZayg6yryoMS1kEwaKAiIIBRFAE+X2poCCOrAVEEagKlDKsomDZZN8UWQSRDiA7tJeyUyhQWsoSmmnSTNK5d+bz++PcCZM0bUPmZs7Nvc/n4zGPOXPuOZ++z4PL5Mzrfs77U2qtAQAAAACAUbeu7QIAAAAAAGAtCMQBAAAAABgLAnEAAAAAAMaCQBwAAAAAgLEgEAcAAAAAYCwIxAEAAAAAGAsCcQAAAAAAxoJAHAAAAADGWCmltF0DrBWBOAAAAACMqVLK5iT/Wko5vu1aYC0IxAEAAABgfN0/ycOS/GMpZarlWuCQE4gDAAAAwJiqtb41yVOT3DDJTCnlti2XBIeUQBwAAAAAxlAp5YgkqbW+MslMkmsmeXEp5VfbrAsOJYE4AAAAAIyZUspErfWS/vbT0swQv0aSuyd5WSnl1m3WB4dKqbW2XQMAAAAA0IJSyluT3CnNDPGfJLlHkj9O8sUkf1Vr/VJ71cHgCcQBAAAAYAyVUn4zyTuTPDbJO2qtS/39T0zy9CRfTfL4WutX2qsSBkvLFAAAAAAYT1dLcuUkn621LpVSNiRJrfXUJK9N0z5lppRyfIs1wkAJxAEAAABgjJRSSn/z+0l2J7lXktRaF5ZD8SQvT9NC5bZJ/nZ5AU443E22XQAAAAAAcOj0F9BcXP65/ryH8veTnJfkUaWUM2utX+iH4iXJzZN8LckHkrxneQFOONzpIQ4AAAAAI2plGF5KuVuSa6eZ+f2tWusPSyl3SfK/ST6T5OW11neXUm6W5ClJrp7kD2qtCy2VDwMnEAcAAACAEVRKWbdiocy3JPm1NCH3fJLtSf5frfWjpZR7JHlrkqOSXJLk4v72CbXWL7VSPBwiAnEAAAAAGGGllFcl+e0kf5XkS0mun+QlSW6d5Na11q+UUm6Q5B5JfjXNDPJ311q/3U7FcOjoIQ4AAAAAI6qUcsM0Qfc/JPlwrXVXKWV9mlD8LUnOKaWUWuv3knyvtUJhjaxruwAAAAAAYDBKKRP77LpWmgUyv9gPw2+Zpl/4h5OcWGudT/K4Usq11rhUaIVAHAAAAABGQH+m9/ICmvft774gye4kNymlXC/JJ5N8JMmja63zpZRfS3K/NK1SYORpmQIAAAAAI6SUcmqSOyb5YJKdSc5K8owk10szM/xPkvRKKccmeUySK6XpLQ4jzwxxAAAAADgMlVKOLKU8tJRyTJLUWmv/peOSfLe/7ydJ/ibJL/df+49aazfJnZKckuT3kjy21vrjNS0eWiIQBwAAAIDD07OSvDnJH5VSrrJi/zWS/CxJSinraq0fSfI7SbYleXEp5cdJXpfk+CQn1Fq/upZFQ5u0TAEAAACAw9PfpWmD8sIk60opb6q1XpjkqCTd/jHrSimptX64lHK//vG3SfKFJN+stV7QRuHQFoE4AAAAABxmSikTtdZLSin/X3/XC/v7X53kp0kuSpJaa2/5nFrrN0op36y1/s+aFwxDQiAOAAAAAIeRfhi+2P/xhmkWxkyS5yc5OskdkhxbSrlGkokkG/tfE0nOLaU8o99HHMaOQBwAAAAADhOllLIchpdS/jvJ7iR/nuT/S5P1PSfN7PDJJLdIs4ZgTbKUpo3KvwvDGWcCcQAAAAA4DKycGV5KuUeaxTOnk/y01torpTwyyY4kf5rkX5K8aWX4vc/MchhLAnHGyvIv/lLKuiTr+v9YlFprbbs2AAAAgMuzIgz/iyS3TfL9JJ9f7hNea91dSnlckqskeVWSo0opb6+1/rg/xNLaVw3DZV3bBcBaKaVM9sPwI5P8Q5LHllI2CsMBAACAw0Up5Z5JXp7k/knOr7XO9/evS5Ja6yVJ/ijJO5O8JMkDSiml/5oMhLEnEGcs9GeG90opRyX5VJK7pumb5R8CAAAA4LBRa/1okj9LclSSR5RSTujvX1oRfC8k+X9JXpvk44Jw+Lni/w+Mi1LKpiSfSHJhmv5aZ++7iEQpZV2t1eNDAAAAQOv2zSlW/lxKeVSSmSSfSfK3tdbP9vdrDQuXwwxxxsl9k2xO8qQk36y1dkspdy+lPLOU8i+llJv0P031/wsAAMZOKWWi/73s832izboAxlX/affl8PtGpZRbJrnR8uu11tcmeVqSOyT5+1LKnfr76/LvcODSBH+MrP388j8uyU2SfDfJtUspf5/kf5L8cZI/TPLxUsrVzRAHAGBclFLW97+XFevtvLiU8tokLyqlHLm8gBsAa6cfhi8voPmKJO9I8skk/1tK+f+Wj6u1virJM5Mcn+RvSil37e83Qxwuw2TbBcChsPwPR382y9G11m1J/i/J2Ul+mOScJNdOcmKS9yW5U5L3JLlzkve2UzUAcDhY+QcqHM5KKbdLMl1KeVat9fv9FoNfSLKUpCS5TpLfL6Xcr9b69TZrBRg3K8LwNye5W5KnJ/lemkUyX1NKObbWekr/2FeVUpaSvCrJ7lLKF2utu9upHIafQJyRU0qZ7C+geaUk/5zk6/1/QL6V5DFJfjvJ+Uk+UWv9Sv+cXpLvJ/lRS2UDAIeBFR+6H5nkiUmulmR7kjcn+U6t9ZJWC4RfzK2SPDzJ+lLKU5I8JE3Y8hdJtia5e5J/SPKhfij+5bYKBRhHpZQnJblNkj+stX66lPLXaX53/2+SF/Rbhb8oSWqt/1pK6Sb5lDAcLp9FNRkpy4tLlFKOSvKpJBelWVH5zbXWXSuOW9/vIb4hyY2TvDrJjiT30zIFALg8/Q/dz0hyRJqZtEcl6SV5UZLX9J9Mg6HXvxf+wyT/kuTfk+xM8tNa6z/0X59Mctc0sxGPTfI7yxNKADi0+rnGXyXZUWt9SSnlr5K8IE3b106Sf0vy60n+stb6itYKhcOQHuKMlH4YfkSSDyT5aZJHJHl9rXVXKeWI5b7i/TD8uDT/uLw2ycYk97eoJgCwP/1gcHmNkuckOTfJvZPcPskt0gTkz0zy//r3IjD0aq0LSd6a5HFJHprm3nhpxeu9JJ/o7/9ZkveWUm7bQqkAY6fWuiPNTPC3l1JuleQvk0wneU+t9dtJ3tI/9GX9meTAARL8MYrulORaSZ5Zaz2n3z7l3klek+Q9pZS/6R937yR3SfLVJHfuh+STZogDAPvq308cmeRhSa6S5H211m/WWi+stW6ttf5Oks8keVKSG7RXKVyx/jo7SZqJIknekOTPk8wluX8p5UYrXl/Kz0PxjUmetabFAoyBlb+XV6q1frrWekGSmyS5cpKzVqxjclSS96f5vfyBNSkURoQe4oyiXpo/VG9SStmR5P9L8ynqR9M86nmXUspZSd6e5LNJzqm11n5P0F47JQMAh4HHJDmlv/0XyzuXW7EleWSSr6cJzZ+95tXBAVix3s7mNAvMfy7NQprvTDKRZkG2fyilPLnWel6y5ynM/0vyO0n0EQcYoFLKhv4TOyml/F6Sqyf5QZq1Sb7TP+y4JFdNcqX+k2gbk/xqkq8kOcUaJvCLEYhzWCv9FST22f2jNDf2L0yyO8lCmgUo3lFKuUWSTye5Tj/8/s6KcRYDANC3vIDmil1vTLIpyXOT3K+U8p+11gv6YXiS1CQXJ9m8xqXCAVmeANLvS3t6mieGu0k+139a8i1JSpqF6Usp5UkrQ/E0PWv39/8NAH4B/afOHpjmibNt/X1vT3LfJBuSrE9yZinlNbXWf661nlZK+ZM0i3h/Ns3s8F9N87S7MBx+QVqmcNjq34jX0jiilLKxv++cJE9L8tf9r9/ph+ETaf5hOS/JhSvH2k+oDgCMsf4s2sX+/cUJSVJr/WmaoPAfk9wvyeNKKddecdrV08yw/dmaFwwHYPk9nebJye1p1tt5zXK43f9w581p2qf8bpIXlFKuv79x1q5q+LnLaivRf62sZS2wSqckeX2SR5VSNpdSHpPktmkWOr5pmhav25M8u5Ty5P45f5jkP5MckybTuGut9RtrXjmMgCIH5HC04lHPKyV5cZJbppmV9X9J/qG/+MTK449I8stJXpZmoaDfcCMPsDZKKeusz8DhZHn2a38W7X+mWZvkabXW9/Zfv0qSpyd5SpL/TvLBNDPH75/k6CS304aNYbP8u7iU8kdJ/i7J/0vyyeUJJisniJRS1if5ozSLzz+v1vq37VQNP7fyyYRSyqOTXD/NEzmnJ/nfWusu9xwcTkopH0zyG0memuTWSX6a5G+X7yFKKb+S5Plp1iZ5dK310/39G5I9CyMDB0EgzmFnxc38ldK0P9mZ5D1JbpymZ+fHkjxsORQvpVw1TQ/xe6UJw+/RfyTUo54MjZV94+BwVkpZDgVvkOR7ST5Ta/3+ZbS4gqGz/F7t32d8Pk0Pzxcm+VStdeeK445OclL/K2n6Ll+U5G/6Ybr7DIbCvu/FUsrJSf4sya36Tz0s719+729IM9FkKcl9knzYBzy0beV9RCnlXUnumuT8JNdJ0yLzzCQP33diFAyj5Ql+/e2PJPm1JD9OE4a/sZQymWSx/zv515LMJpmutb6ytaJhxGiZwmGj/4/C8qI+R6RZFPMnSe5fa31+mseGuknukuRd/T9kk+RGaWaQ/1+Su/fD8El/pDIsSimvTnL3tuuA1erPpv1cmv7KJyV5aZLPlFLuIwzncNH/43Ndklck2Zpmce7/rbXuLKX8UinlV0op10iyI8nJSf6+f+q3kvyjMJxh0p9IslhKOaaU8oL+7uVwe6l/zESy572/Mc3isbettS7WWj/QfyrT2lO0akUY/g9Jjk+yJc3fdtdM8qEkv5fkT9qrEA7cyt+rtdbfSvKRNE88PKKUcrV+WL78u/lTSb6bJucABkQgztArpdw82fsfjTSfoC4leXKt9Sf9WQJ3TjOL5d+S/FaSN5dSjq61fj7Jn9dan9wfY8IsF4ZFKeV6SR6aZnZL+iEMHHb6H1S+L83slocluUaa3ocLSd6yvx60MMTWJfmlJJ9eXlCwlPKoJB9O8yTaR9OsUbIzTTu2U5O8KMlflVKuIgxnGPRn1C71Q+4PJ7lLKeW6adoAbU7ykuTn/cD7/ZdvnuZDoNuuHMu9M8Ogf69x+yTvSPKl/geV10ry4CSv63/FBzgMq5U98PvZROlv3z/Je5P8ZpITSylXXX69fw+9Ic2Tl8CACF4YaqWUqyX5RCmlk+x1M/6FJO9P8qVSyklJppL8QT/8flaSc5Lcs3/ukcuPg/b/MPBHKsPkwiRzSW6SNE9AtFoNHLxfThMgviTJV/otgG6V5JpJnt9vm7IusegVw63//pxIclySqf5CVm9O8uo0sxD/IsmV0iw6mFrrhWmeinhBmtniJ3qP07b+zPDlpx1ukaadz2P6H/B8I8krk2wppby5lHL1/uKwv5HkX9K0I3xNS6XDHivDw/7v1aPSBOLba63zpZSbJPlqmg98/rK/78Qkv99GvXB59umBf/tSyq2SXGX59Vrr7yf5nyTPSTJTSvnlJA9K8jf94964xiXDSPPJKcNuZ5J/TPI3pZRP1Fp/PUn6veH+OUn6PbU+leSs/jmTaR4F/b80N/+7lwfzyD7Dpj+z5VtpWvusXMhNv2UONzdP86jnJ2utl5RS/jjNjfsza63P77dTeXopZabW+pNWK4UV9m1v0v/de0kp5RFpZmvdMMl3kvxmrfVj/XNOSHKdUsoRtdZLaq0XlVKen+SSJO/z+5u2rWgx+Kkk25PsrLV+rf/afCnl1CSLSR6X5gnLJNmW5imfE7T+oW0rJzKVUt6TZjLUe5N8P8kNSil3TLOo8f8kObF/T/3LSR6Y5D3F4poMmRXv5zckuV+S9UkuKKU8tNZ6Zv+Y+5RS/ivJH6f53fytJN9Osw7at9qpHEaTQJyh1b8Jmi+l/EuaUPv5K0PxUsr6NP+I3CzJN2ut8/1Tr59mhvjTk5zVnx3jhp6hUUqZSXJskk+mWQBoLsmt+y1+tic+vOGw9I00v6vv2l+Q7Y1pFhf8x/6srt9KM6vr+mnWf4DWlVI21lp3l1I2J3l4kmsn+VqSr9Zaz+yHK5Npfi1v689WvGGa/rUfrbVesjxWrXWulPL3fn8zRK6e5p74AWmeqrxGkp+meT9fUEp5XpI3penF3O0f+85+GL5nwTdYa/vMpP2rNL2TX95vlfmWNE/k/HGS/6i1/mH/uGOT/HWS6yb5kDCcYVRKeXqSO6Z5r14ryUOSnF5KeUit9cNJUmv9vVLK29O0AnpuktPqikW9gcEo7tkZVis/1S/NApmPSPJPafrF/fqK4/4yzSP6r07TV+uBaW7q79afHWOmLUOjlHKLJKeleRz/Omn6LJc0Law+neZJh+8n+Waa0PDbtdYft1MtXLZ+gPgnSf69PyvrBmn60h6dJlR8Vq31Bf0w/KZp3vfnJnm4P1JpUynl1kl+t9b6D/2fj0nzAeVxSTamCcB3JvmzWuueWYallCsnuU2S5/ePuctyf0/3GQyr/n3HXyX5syRPqrXOHMA5JpIwFEopd0nz3v1mkpn+B5gb0gTiT0izdsO707Rne2iatUvuUWs96zKGhDW17+/TUsoLk/yk1vrC/s93TPIPaULyBy+H4v3X/iPJU80Mh0PDDHGG0oq2ERuSXL3Wel7/0aKafWaKJ/lgkquludm/MM0N0+/1/3j1qBxDpdb6jVLKr/ff31dLM3vr95I8Ocl8mkWs/jBNKLMjTX98GEb/mGbhtWuUUl5Ua/1ev9fyB5JckOQH/TYp907z/j4iySP8bqYt/Q9n1id5dpL79N+Hz0uz9sgPkzyy1vr5UsrvpQlg3llKeUCt9X2lWZTwxUnumuZDy/utWKhbcEjrLuu92L/vmEmyKcmLSinztdZ/XXHeng90lre9pxkGpZR/SHJSku8meXs/DJ+otS6UUv4+zX3zn6VZz+H8JOcl+fVa61daKxpW2LdneJIj00yK+szyMbXWz5VSnprmQ553llIeVGv9n/5rv7/2VcP4MEOcobMiDL9SmkWsrpRmkZSv9mckPjLN7Kyzaq1365+zIclV09zsf7ffJsWjngyN/gzEmmRpuS3KilmHd0jyuSS/XGv9eillU5qWKrXW+sP2qoZLW/HEzh3SfHizO8lMkhfXWi8updwrzYyta6SZLX5Omv7LD6q1dgWItK2UctMkp6Rp4fPaNP3v/zvJa1cEg7+S5qm049MELN/qz1S8VZLXaSnBMFl+L/bvH/4gzULdX0vyxVrr2f1jbpbkmWlaAz221vrq1gqGA1BKOTLJx5LcLsnrkjx+RYvM5WNulOZe46dJdizfY8MwKaW8Oclv5+cLaL4myV/VWnetOOZ2aWaK3zvJb9VaP7rWdcK4EYgzVJZnpvQDly+kmYX11jSzAnb2jzkyTRjz/OzTPmXFOGYfMjRKKS9OEx7+UppZiM9Jv7dhKWVdmuDwG0keXWt9R3uVwuXr//79QpoZ4KenWYDtL5JcL00Ifko/FL9BkqOS3DjJ15Oc3X+/CxBpRSnltmkW3P5q/714oyQvS3LLNB9A/n6tdbb0F8nsn/OANPcgf1Rrfc8+4/lgh6GwYiLJUUk+kuSYJBeneW9/NMkraq0f6B97syTPSPKwJCfVWl/cUtmwl31bT5VS1vc/RN+U5BNp3s9PSPLG/gxx9xMMrX1mhj8jzROVz0/zVMMTk9yo//1NtdaFFefdMc3v6KfVWr+55oXDmBGIM3T6C1b9W5rFMh9Ra/12f/+6NDNm64qZ4v+Q5Ee11lu2VjBcjlLK29I8Yv+vSTaneV//fpInJfnnWuvu/nHfSPL+WuuTWioVLle/3cQ/pVng53drrV/v7z8iyfvSLHj1/CSn7m/hHx9U0pZSytFJzkjy6lrr81c8nXOTNE83/E6SN9daH94//oha6yWlWbx7LsnJy/3GYRj174tn07Rae0yt9dullE+labt2ZpLn1lr/u3/szdI8IXFMkrvrf0/b9gkPJ9Osq9Nd8cTO5iSfTTOB5KQ0a5csuK9g2JVS7p7mCYeLa62v6e87Osmn0rTN3PN+XnHOng/mgUNrXdsFwH5MpJlN++k0j9knSfo3PKW/PZ/kjWkC8R/0Q3QYKqWUh6d53P6P0gQqJ6XpQZs0s8V7/Q96kmY213VW/AxDpf+H6Q2TXLgyDO/ftP9umt6df5Xkyf0ZXcsh+vL5/milLTuS3Kcfhm9McuN+APPtJI9P8v4kDyilPC9J+mF4SbPA1e40T/bAUOq/V6fTvM//pB+GvyfNwt1PT/M+fm4p5b5JUpvF2R6XZuHBuvL3NKylUsrEPmH4s5K8Pcn/JnnJ8r1E/+++OyX5cZoP5h9WStngvoJhVkp5QpoPKp+TZGt/38Z+W587JvlJVryfl88ThsPaEbwwFJZvxvvB9nWS3CLJt5Z7gS8f15/RdZVSyq/UWi9O8vI0f+QuCsUZQjdNsjPJN/rv0Rsn+c8kb0nyzP6jntfpH/uGNKG5m3uGzorA5EdJrlJKuWGyJzhcDsX/Lc2aD3+S5E/6M7fMPKR1tfGd/vv43WnCllv1g5jvpfkg52NJpkspby6l3CfNQm0vSPK9NB/Aw1BZ/r3c/z371SQfr7X+sJTyiiS3SbKl1vqSNIsa3zbJX5dS/qB/zg/qzxc49nuaNVVKObKUclytdXFFGP62JI9Os4DmF9K0mPhof3b4ylD8/CSnpemVD0NjPx8ufjHJf6R5QvhOSVKbhWGP6L+f7xjvZ2iVQJxWLYfYyzfj/Ruj76b5B+TEUso1+osETa447cFJTi2lXKvWumt5dotengyhGyaZrLX+pJRy/SSfT/I/aR5nni+lPDrNDJgjk7ys1vrlNouF/dknMPlYmr7gD+33q105k+VKaW7qt6bpK75h37FgrS3fZ6x4H5+SpJvmvbocip+Tpjft6UkemqZv+F3T9K29sw/dGSb73jv3t/8zyfNLKb+U5J5JTk6zqGbSrOOwkOQ3+19ZcZ4P4VlT/XuH76Vpsba870Vp2ko8tN868II0TwzfLsnn9gnF75amfcrn17ZyuGz9e4nlFj8T/ScYPpnkqUk+nORp/RnjKyeTeD9DywTitKa/GMpiKWVzKeVxpZSTSimPLc2Cmu9Is0jb35dSrtkPxdeVUm6RZvbhXJqZikn2/qMA2lRKeWIp5cH9Hz+Y5GqllL9P07/2Q0n+rDaLDl43yW8k2Z5mPQfvYYZK/4b+iCTXWt5Xa31XmrY/z0nyhNIsTJhSyq2T3CdNq6snJrl1mpt8aM2K+4wjk7y8lPKgWutH06xBclyS12bvUPwv0/TD35Bmtu1J/UXdJn3ozjC4nHvno/prklwnyc2T/KA/E7GkWTT2X5P8WppWKdCK/gSntyc5O83v25RSbpPkKkn+ttb66VLK09J8oPPoNE/v3CrJ6Svap+ystf5av/UPtG6ftj/PTZNjfLGUMpNkfZp7jg+maV21MhTf6P0M7bKoJq3qzxL4bJKjkxyV5o/QbyV5WpJHJblHmt6dp6VpP/HraT7IucNySG52C8OilPLmNP3vP5vksWmCxH9P80jc52qtd+4fd+0kz0vyW0l+y00Qw6b/weQrk/xKmg8n357kJbXWb/Vnaj0nTfD9vTQf6hyXprfn8UkekOTVaRZr+9qlR4dDb/kP1P59xieSzPe/Pz3NfcSdk7wpzRMNf5rka/3jb5EmiHmaEJxhtJ9754k0k0SekGYthzekmWH71P7rL0xydq31T/vnT3hv04ZSyvWS/HeaRQT/sZTyL2n6KG9N05rq+CSvS/L0WuvrSrOuzv+l+X39wyQ368+qhaFTSnlnmgXmP5jkymk+nLxZmidzfpzk1DTZxjNqra9YcZ6JUdASgThrrj+7pdfffmOS66bpb3h+kl9O8vdpZrg8Nskt0/TUukOaRz+/mOSxy21UlseBtpVSXpfmJucRSb5ea11ePOWWacLEI9Pc1G9Pc3N0mzT97ztt1AuXpR+Gfz5NwPKeJBelmUn7riT/WGs9o3/cljRtJa6d5vfzP/V/N78jyY2S/Hat9actXAIkSfozCj+W5vfunyf5fq11YcXrd0vz3t6eFaH4itcFhwyFA7h3flaambSPSPKracLxa6UJGn+Q5C7umWlbf4b4R9IsLP/dNPfCd0hyXv/+4Z+S3D3Jg2qtF/TPeVeae+hrJHlwrfU7rRQPl6OU8rg0E/oekuSMWutCKeURaT7g+eda6+NKKbdP8jdpJo48ttb6r60VDCQRiNOSfuByQpoZ32fWWt+y4rWrp1n06kpJfq3fa/l6SS5Y8ceAMJyhUUq5e5LXJ3l8rfX9/X1XT3L/NH+MXinJTdLMEOimaZ/yL7XWs9upGPavNKvcvy3NH59/VGvdWkp5a5J7JzkiTUuUp9daP98/fs+sllLKndN8kLklya/XWs9q4xpgWSnlD9M8jfOg5fdjKeWuaXos1zQ9w9eleQrtiCQnCFsYVgdw7/yuNDPCfy3NIpq/kmQxyWv7Tz+4d6Y1y/cLpVmU+8tpnmz4i1rra/uvT6T5EP6GtdZf7e9bbm3170neX2u9uJ3q4fKVZjHjmyX5/Vrrzv77/ItJ/ivN34c7+r/Dr5fkb5M8u9b6zfYqBpJk8ooPgcHq9zP8uyRPSnOj/gf9/RP9RTV/Ukp5TpL/TPKwJP+W5IcrenMVN/QMmauk+SP0u6WUY9LMFP/nNO/vayf5ZJJH1Fqf3VaBcIBummQpyQv6Yfjb0oQrd01yizRh+UmllBfUWj+7Igy/fppen7+S5G4WiGVIXDPJpiRzpZQbJ3l4kmckOSfNo8yfT/MBzmPTLAT7vXbKhMt3gPfOz03y3iQPrLX+e5p7j6w4zr0zrVnREuLRaZ7K2ZXk6aWUryb5Qv9Dm39N8u5+K5Wvpmk5eIckfy4MZ8jdOMnGfcLw/0nzoc/OUsr/S3KTWuvTSyl/Wn++ID3QIotqsub6N0QzaR4hSpqZLunfCC2/J7+W5ob/6OXX9jkfhsk30syofU2amQCvSzNT64Qkt0sTJt59+eD+H7YwjL6T5FVpFrB6XJo/Rh9Wa/16mtm0Z6UJEF9WSrnZ8km11u+nCRrvLQxniJye5KppHtF/b5LHpwm/75Cmz+fxSW5ea/3fWuuD+vchE61VC5fhF7h37iW5+n7O1/qHYfGvae6P/zDJJfn5WjtJ8zv7pCR/nGYW7e3TtF87r4U64RfRSXLtUsrD04ThH07yZ/0w/PpJ7pdkQynlCGE4DA8zxGlFrfX8UsozkmxO8tellG/XWv95xQKZ102yI4nZAAy9/kKD90kTCH4pyYtrre9KklLKL6dZKPZHK473oQ5Dqda6u5Tykf5jzXdPE4B/sv/aXCnl/DQ3+tdLE56vPPe7a14wXI5a61mllN9I0x/8K0k+Xmv9cj/0PjbJN5PM7XOO4JCh9AvcO+9orUi4Av0P0JMkpZT/L01LlDeWUh5ea/1sklP7T6cdmeRntdaftVQq/CLekORx/e//nabt4FK/ndXfJplK8lRhOAwXPcRpVSnlGklenuRBSV6a5FNJNqZZ/OqIJMf745TDxb4LsJVSrpXmEed7JPnNWusPWysOfgH9wPA/0oSG96+1/qyUcpskr0wyvaKHuEUHOayUUtan6fP5yiTzSe63IlCEoefemVHRf2LyDmlC8SPSzAz/ovcvh6NSym+l6YP/nTRPCi+leRrtzmn+DvxSi+UB+yEQp3X9G/uXJHlwkpLk+Wke93xcrfUSgQuHo1LKI9Ms3Pa7aRZqs8Agh5VSyvFpZod/MM3N/d3TPN58d7+TORyVUq6aJjS8T5pZtneqtfZKKeuE4hxO3DszKlaE4v+a5umz3661fqHdquDglFJun+Q5SW6Y5p75jCQvrLV+o9XCgP0SiDMUSinXTPJPSR6R5E9rrW/o799Qa11otTj4BZVS7pLkZWla/jyu1vrVlkuCg1JK+fUkr04ykablxENqrV1hC4ejUsq9kzw7TRurR/fD8EmLDXI4cu/MqOiH4ndO8qIkf1Jr/c4VnAJDq5SyIc19czdJ3GPA8BKIMzT6N/YvT/LAJCfWWv+t5ZLgoPQXuLppkgtrrT9tux5YjVLKldI8ynxhv7e4AJHDUj90uW6S8/rvZR/scFhz78woKaVsrLXubrsOAMaDRTUZGrXWH5VSHpfm09RXl1IWaq1vbLsu+EX1H73/Ztt1wCDUWi9Of4HjfmsJYTiHpf6Cxj9I9ryXheEc1tw7M0qE4QCsJYE4Q6XW+uNSyhPT9Nz6Ytv1APBz+iwzKryXGRXunQEAfnFapjCUPMYMAAAHxr0zAMCBW9d2AatVSnlwKeVlpZRPlFK2l1JqKeXf266L1XFDDwAAB8a9MwDAgRuFlil/k+Q2afqbnpfkFu2WAwAAAADAMDrsZ4gn+eskN0tydJI/b7kWAAAAAACG1GE/Q7zWevrydimlzVIAAAAAABhiozBDHAAAAAAArpBAHAAAAACAsXDYt0wZhN/4jd+obdcAgzAzM5MkmZ6ebrUOWC3vZUaJ9zOjxPuZUeG9zCjxfmbUzM7OjmJP5MM+e5ybm8uWLVvyhCc8IVu2bGm7nFW9R8wQBwAAAABgLAjEAQAAAAAYCwJxAAAAAADGgkAcAAAAAIDLVOth3wZ9D4tqAgAAAACMsPe///055ZRTVj3OxMTEAKpp12EfiJdSfj/J7/d/vGb/+11KKa/rb2+ttT55jcsCYJWmpqYyOzvbdhkwEJ1Op+0SAACAMfblL395IOP0er2BjNOmwz4QTzKV5JH77LtR/ytJvp9EIA5wmOl0Opmenm67DFi1mZmZtksAAADG3EknnZSTTjppr31LS0vp9Xrpdrvp9XpZWFjY8/PyvuXtbdu25bnPfW5KKS1dweAc9oF4rfXZSZ7dchkAAAAAAIeNdevWZcOGDdmwYcMVHjs3N3foC1ojFtUEAAAAAGAsCMQBAAAAABgLAnEAAAAAAMaCQBwAAAAAgLEgEAcAAAAAYCxMtl0AAAAAAACHzhe+8IU8+9nPTpKUUlJrvdztfS0tLa1JnWtBIA7AUJqamsrs7GzbZcBAdDqdtksAAADG2Ctf+crs3Lmz7TKGgkAcgKHU6XQyPT3ddhmwajMzM22XAAAAjLlSStslDA2BOAAAAADACDv55JPzzne+M8mBtUzpdrvZtWtXdu/enfn5+Wzfvj1nn312O8UPmEAcAAAAAGCEffjDH8673vWuVY8zCjPNBeIADCU9xBkleogDAABtuuSSSwYyzmUtunk4EYgDMJT0EGdU6CEOAAC07cQTT8yJJ554hcctLi6m1+ul2+2m2+2m1+tlYWEhF154YZ7whCesQaWHnkAcAAAAAIBMTExkYmIiRxxxxF77jzzyyJYqGrx1bRcAAAAAAABrwQxxAAAAAIARdu655+b1r399kmZhzOVe4Je1vWxpaSndbje7du1a24IPIYE4AAAAAMAIe97znpezzz571eOUUgZQTbsE4gAAAAAAI+zUU0/NZz7zmST7nxW+PBN8eSHNfRfVvPjii/P+97//UjPID0cCcQAAAACAEXalK10pv/Vbv3XQ58/NzeX973//ACtqj0AcAAAAAGCE7dy5MxdccMGlZn8vb1/Rzzt37mz7EgZGIA4AAAAAMMKe/OQn5xvf+Maqx7nOda4zgGraJRAHYChNTU1ldna27TJgIDqdTtslAAAAY+yOd7zjQALxrVu3DqCadgnEARhKnU4n09PTbZcBqzYzM9N2CQAAwJg744wzBjLOwsLCQMZpk0AcAAAAAGCEPfaxj82pp5560Of3er2ce+65qbUOsKp2CMQBAAAAAEbYueeem3POOWfV40xMTAygmnYJxAEYSnqIM0r0EAcAANp05plnDmScXq83kHHaJBAHYCjpIc6o0EMcAABo2zOe8Yw84xnP2PNzrTWLi4vpdrvp9Xrpdrt7thcWFvbsW/4+NzeX5z3veSmltHgVgyEQBwAAAAAYI6WUTE5OZnLywOLhubm5Q1vQGlrXdgEAAAAAALAWBOIAAAAAAIwFgTgAAAAAAGNBIA4AAAAAwFgQiAMAAAAAMBYE4gAAAAAAjIXJtgsAAAAAAKBdS0tL6fV66Xa7e76Wf77wwgvbLm9gBOIAAAAAACPstNNOy6c//em9Qu59txcXF69wnI0bN65BtYeWQByAoTQ1NZXZ2dm2y4CB6HQ6bZcAAACMsdNPPz3nnXfeQZ27efPmbNq0Kcccc0xue9vbDriytScQB2AodTqdTE9Pt10GrNrMzEzbJQAAAGPuMY95TL70pS9d7gzxy3vtkksuybe//e189rOfzQMe8IC2L2dVBOIAAAAAACPsbne7W+52t7sd9Plzc3PZsmVLlpaWBlhVOwTiAAAAAABjrNaaxcXF/c4SX1hYyLZt29oucWAE4gAAAAAAI+yVr3xl3vGOd7RdxlBY13YBAAAAAAAcOmeeeeZAxpmYmBjIOG0yQxyAoTQ1NZXZ2dm2y4CB6HQ6bZcAAACMsWtf+9r59re/vepxer3eAKppl0AcgKHU6XQyPT3ddhmwajMzM22XAAAAjLmNGze2XcLQ0DIFAAAAAGCE3frWtx7IOJOTh//86sP/CgAAAAAAuEz3u9/9cr/73e+gz9+2bVse+MAHZnFxcYBVtUMgDgAAAAAwwubn53PGGWfs97WlpaX0er10u929vlbuu/jii9e44kNHIA4AAAAAMMKe+MQn5pvf/OaqxymlDKCadgnEAQAAAABG2NOe9rS85jWvOahzFxcXs2vXrpx11lmptQ64srUnEAcAAAAAGGE3vOENc/LJJx/0+XNzc9myZcsAK2rPurYLAAAAAACAtWCGOAAAAAAAqbVmaWkpCwsLey2qeeGFF7Zd2sAIxAEAAAAARthpp52WN77xjW2XMRQE4gAMpampqczOzrZdBgxEp9NpuwQAAAAiEAdgSHU6nUxPT7ddBqzazMxM2yUAAABj7qijjhrIOOvWHf5LUgrEARhKZogzSswQBwAA2nTBBRcMZJylpaWBjNMmgTgAQ8kMcUaFGeIAAEDbbnrTmw5knImJiYGM0yaBOABDyQxxRokZ4gAAQJvOOOOMgYzT7XYHMk6bBOIADCUzxBkVZogDAABte+Yzn5lnPvOZe36utWZxcTHdbjfdbje9Xi8LCwvp9Xp77Vvenpuby8knn6yHOAAAAAAAh5dSSiYnJzMxMZH169en2+3u+b7ya3nfJZdc0nbJAyMQBwAAAAAYYS960Yvyvve9b9XjlFIGUE27Dv857gAAAAAAXKZBhOFJsnHjxoGM0yaBOAAAAADACHv+85+fjRs3ZuPGjdm0adMVbq/8OuKII/aMs3v37havYjC0TAEAAAAAGGF3vOMd88EPfvAXOmdxcXHPwpo/+9nP8qd/+qeptR6iCteOQBwAAAAAYIS95S1vyb/+67+uepxR6CEuEAdgKE1NTWV2drbtMmAgOp1O2yUAAABj7PTTTx/IOKMwQ1wPcQAAAACAEXbkkUe2XcLQMEMcgKHU6XQyPT3ddhmwajMzM22XAAAAjLnHPvaxOfnkk1NrzdLSUrrd7p7+4MtfS0tLbZe5JgTiAAwlLVMYJVqmAAAAbZqZmckPfvCDVY+jhzgAHCJmiDMqzBAHAADaduqpp+brX/96kr1D7eXZ4stfK2eNr9zesWNH3vnOd45ED3GBOAAAAADACNu0aVNud7vbHfT5c3Nzeec73znAitpjUU0AAAAAAMaCGeIAAAAAACPsm9/8Zl7ykpckOfA+4CuP6/V6h6SuNgjEARhKFtVklFhUEwAAaNNLX/rSPT3EV8OimgBwiFhUk1FhUU0AAKBtp556as4+++wkTahda02tNYuLi1lYWLjUYprL+5a3L7744rztbW+zqCYAAAAAAMNtcnIyxx13XJKk1ron+O52u5mcnNwrEN933+TkZBYXF1u+gsERiAMAAAAAjLAnPvGJOeuss9ouYygIxAEAAAAARtijHvWo/NM//VOSn7dMubztfS0uLmbr1q1rU+whJhAHAAAAABhhU1NTeetb33rQ58/NzWXLli0DrKg969ouAAAAAAAA1oJAHAAAAACAsaBlCgAAAADAmKq1ZnFxMd1uN71eL91u91Lb27Zta7vMgRGIAwAAAACMsJe+9KV5z3ve03YZQ0HLFAAAAACAEXb00UcPZJx16w7/OPnwvwIAAAAAAC7Tpz/96YGMs7S0NJBx2qRlCgAAAADACHv605+e1772tUmSUsoBn7e0tJRer5ddu3al0+kcourWlkAcAAAAAGCE3eAGN8jf//3fH/T5c3Nz2bJlywArao+WKQAAAAAAjAUzxAEAAAAARtjFF198uX3EFxcXs7i4mIWFhfR6vXS73XS73T3bF1988RpWe2gJxAEAAAAARtgDH/jAdLvdVY+zYcOGAVTTLoE4AENpamoqs7OzbZcBAzEqi88AAACHp/Xr1w8kEK+1DqCadgnEARhKnU4n09PTbZcBqzYzM9N2CQAAwJh78IMfnDe84Q2rHqfX6w2gmnYJxAEYSmaIM0rMEAcAANq0c+fOgYxjhjgAHCJmiDMqzBAHAADa9vjHPz6Pf/zjL/P1paWlPQto9nq9Sy2uuW3btjzlKU9Zw4oPHYE4AAAAAMAYW7duXTZs2HCZi2bOzc2tbUGH0Lq2CwAAAAAAgLVghjgAAAAAwAj70Y9+lLe//e1JklLKFR5fa92rfcquXbsOdYlrRiAOAAAAADDCHvawhw1knCOOOGIg47RJIA7AUJqamsrs7GzbZcBAdDqdtksAAADG2PWvf/18//vfX/U4vV5vANW0SyAOwFDqdDqZnp5uuwxYtZmZmbZLAAAAxty//du/Zdu2bUmSpaWldLvdvVqi9Hq9y923Y8eOnHbaaVlcXGz5SlZPIA4AAAAAMMImJiZy3HHHHfT5c3NzOe200wZYUXvWtV0AAAAAAACsBTPEAQAAAABG2OLi4qpbpowKgTgAAAAAwAh78pOfnE6n03YZQ0EgDgAAAAAwwh7+8Ifn+9//fpKklJJa6+Vu72txcTHbt29fm2IPMYE4AAAAAMAIu/3tb593v/vdB3z84uLiXu1TLrzwwpx44omHsMK1IxAHAAAAABhhb3zjG3Paaae1XcZQEIgDMJSmpqYyOzvbdhkwEHr1AQAAbdq1a1fbJQwNgTgAQ6nT6WR6errtMmDVZmZm2i4BAAAYcw9+8INzxzveMb1eL91ud087lOXt5a9LLrkku3btyvz8fObn57N79+7Mz89n+/btOfvss9u+jIEQiAMAAAAAjLCTTjppIIH2L/3SLw2gmnYJxAEAAAAARth97nOf/PCHP8zi4uIVHltrzeLiYnq93qVeu+CCCw5FeWtKIA4AAAAAMMIe9KAH5UEPetAvdE6tdU9bla1bt+aRj3xkut3uIapw7QjEAQAAAABGWLfb3Wt299LS0n57ifd6vSwsLOzZt/x9x44dLVY/WAJxAAAAAIAR9uQnPzlnnXVW22UMhXVtFwAAAAAAwKFz5zvfue0ShoYZ4gAMpampqczOzrZdBgxEp9NpuwQAAGCMffKTn2y7hKEhEAdgKHU6nUxPT7ddBqzazMxM2yUAAABj7tRTT80PfvCDJEkpJUtLS1lcXNzTO3zfXuL77tuxY0fe9KY3tXwVgyEQBwAAAAAYYRs2bMiNb3zjgz5/bm5uZAJxPcQBAAAAABgLAnEAAAAAAMaCQBwAAAAAgLEgEAcAAAAAYCxYVBMAAAAAgL3UWrO4uJhut5sdO3a0Xc7ACMQBAAAAAEbY2972trzqVa9a9TiTk4d/nHz4XwEAI2lqaiqzs7NtlwED0el02i4BAAAYY4MIw5Nm1vjhTiAOwFDqdDqZnp5uuwxYtZmZmbZLAAAAxtwTnvCEvPSlL227jKEgEAcAAAAAGGFbtmzJli1bDurcxcXFbN26NQ996EOzuLg44MrW3rq2CwAAAAAAYDhNTEzkiCOOaLuMgRGIAwAAAAAwFgTiAAAAAACMBYE4AAAAAABjQSAOAAAAAMBYEIgDAAAAADAWBOIAAAAAAIwFgTgAAAAAAGNBIA4AAAAAwFgQiAMAAAAAMBYE4gAAAAAAjAWBOAAAAAAAY0EgDgAAAADAWBCIAwAAAAAwFibbLgAAAAAAgEOr1pper5der5eFhYX0er10u910u929tve3b/v27W2XPzACcQAAAACAEfaMZzwjn/70p9suYyhomQIAAAAAMMLue9/7DmScUspAxmmTGeIAAAAAACPs13/913P66acf9Plzc3PZsmVLaq0DrKodZogDAAAAADAWBOIAAAAAAIwFgTgAAAAAAGNBIA4AAAAAwFiwqCYAAAAAwAhbWFjID37wgyRJKSW11vR6vXS73fR6vSwsLOz5eXnfyu87duxo+QoGRyAOAAAAADDCHvnIR+ZHP/rRqseZmJgYQDXt0jIFAAAAAGCEDSIMT0YjEDdDHAAAAABghL3jHe/I+9///iQ/b5lyWduLi4t7tUvpdrvZuXNnPvaxj2VhYaG1axgUgTgAAAAAwAg77rjj8shHPvKgz5+bm8vHPvaxAVbUHi1TAAAAAAAYCwJxAAAAAADGgkAcAAAAAICxIBAHAAAAAGAsCMQBAAAAABgLAnEAAAAAAMaCQBwAAAAAgLEw2XYBAAAAAAAcOmeeeWae+9znJklKKQd0zsrjlpaWDkldbRCIAzCUpqamMjs723YZMBCdTqftEgAAgDH2hje8Idu2bWu7jKEgEAdgKHU6nUxPT7ddBqzazMxM2yUAAABj7pRTTtkTiC8tLaXb7abb7abX62VhYSG9Xu9y9+3YsSOnnXZay1cxGAJxAAAAAIARNjExkeOOO+6gz5+bmxuZQNyimgAAAAAAjAWBOAAAAAAAY0EgDgAAAADAWBCIAwAAAAAwFgTiAAAAAACMBYE4AAAAAABjQSAOAAAAAMBYEIgDAAAAADAWBOIAAAAAAIwFgTgAAAAAAGNBIA4AAAAAwFgQiAMAAAAAMBYE4gAAAAAAjAWBOAAAAAAAY0EgDgAAAADAWBCIAwAAAAAwFgTiAAAAAACMBYE4AAAAAABjQSAOAAAAAMBYEIgDAAAAADAWJtsuAAAAAACAtVFrzeLiYrrdbnq9Xrrd7l5fK/f1er0sLCzkoosuarvsgRGIAwAAAACMsOc85zk5/fTTVz1OrXUA1bRLyxQAAAAAgBF24xvfeCDjTE4e/vOrBeIAAAAAACNsdnZ2IOMsLi4OZJw2CcQBAAAAAEbYPe5xj4GMMzExMZBx2nT4z3EHYCRNTU0N7BNsaFun02m7BAAAYIyde+65Axmn1+sNZJw2CcQBGEqdTifT09NtlwGrNjMz03YJAADAmHvSk56U+9///kmSUkpqrVlcXMzu3bszPz+f+fn57Nq1K7t27drr5+XXt2/fnrPPPjullJavZPUE4gAAAAAAI+yJT3xivva1r7VdxlAQiAMAAAAAjLC//Mu/zEtf+tIkP58hfnnb++r1ejn77LPXpthDTCAOAAAAADDCbnGLW+SVr3zl5R5Ta023202v10u3292zvbCwkG3bto1MW1OBOAAAAADACPuXf/mXvPWtb227jKGwru0CAAAAAAA4dCYnBzMvehQW1RSIAwAAAACMsC9+8YttlzA0tEwBAAAAABhh09PTecUrXvELnbNyNvgll1ySb3zjG5e56ObhRCAOAAAAADDCbnazm+UlL3nJQZ8/NzeXLVu2DLCi9miZAgAAAADAWBCIAwAAAAAwFgTiAAAAAACMBYE4AAAAAABjQSAOAAAAAMBYEIgDAAAAADAWBOIAAAAAAIwFgTgAAAAAAGNhsu0CAAAAAAA4dHq9Xi644IIkSSklvV4vvV4v3W53z9e+P6/ct2PHjpavYHAE4gAAAAAAI+xJT3pSzjrrrLbLGApapgAAAAAAjLA73/nObZcwNMwQB2AoTU1NZXZ2tu0yYCA6nU7bJQAAAGPsIx/5SNslDA2BOABDqdPpZHp6uu0yYNVmZmbaLgEAAGAgSiltl7BqAnEAAAAAgBH2rGc9K6eddtpBndvtdrNr16586UtfSq11wJWtPYE4AAAAAMAIu8ENbpDnPOc5B33+3NxctmzZMsCK2mNRTQAAAAAAxoJAHAAAAACAsSAQBwAAAABgLAjEAQAAAAAYCwJxAAAAAADGgkAcAAAAAICxIBAHAAAAAGAsCMQBAAAAABgLAnEAAAAAAMaCQBwAAAAAgLEgEAcAAAAAYCwIxAEAAAAAGAsCcQAAAAAAxoJAHAAAAACAsSAQBwAAAABgLAjEAQAAAAAYCwJxAAAAAADGwmTbBQAAAAAAcOh885vfzMtf/vIkSSkltdbL3d5Xr9dbm0LXgEAcgKE0NTWV2dnZtsuAgeh0Om2XAAAAjLGXvOQl+frXv77qcUopA6imXQJxAIZSp9PJ9PR022XAqs3MzLRdAgAAMOZe/OIX51vf+tZe+2qtWVxcTLfbTbfbTa/Xy8LCQnq93l77ut1uduzYkbe//e2XOYP8cCIQBwAAAAAYYUcccUR+9Vd/9aDPn5uby9vf/vYBVtQei2oCAAAAADAWBOIAAAAAAIwFLVMAAAAAAEbYRz7ykZx88smrHmfdusN/frVAHIChNDU1ldnZ2bbLgIHodDptlwAAAIyxQYThSTIxMTGQcdp0+Ef6AAAAAABcpj/6oz8ayDhLS0sDGadNZogDMJQ6nU6mp6fbLgNWbWZmpu0SAACAMXfiiSfmxBNPvMLjlpaW0uv10u1293zvdru58MIL8/jHPz6Li4trUO2hJRAHAAAAACDr1q3Lhg0bsmHDhr32b9q0qaWKBk/LFAAAAAAAxoJAHAAAAACAsSAQBwAAAABgLAjEAQAAAAAYCwJxAAAAAADGgkAcAAAAAICxIBAHAAAAAGAsCMQBAAAAABgLAnEAAAAAAMaCQBwAAAAAgLEgEAcAAAAAYCwIxAEAAAAAGAsCcQAAAAAAxoJAHAAAAACAsSAQBwAAAABgLAjEAQAAAAAYCwJxAAAAAADGgkAcAAAAAICxIBAHAAAAAGAsCMQBAAAAABgLAnEAAAAAAMaCQBwAAAAAgLEgEAcAAAAAYCwIxAEAAAAAGAsCcQAAAAAAxoJAHAAAAACAsSAQBwAAAABgLEy2XQAAAAAAAIfORz7ykZx88smrHmfdusN/frVAHIChNDU1ldnZ2bbLgIHodDptlwAAAIyx//u//xvIOIuLiwMZp00CcQCGUqfTyfT0dNtlwKrNzMy0XQIAADDmnv3sZ+/ZrrWm2+2m1+ul2+3u2V5YWLjUvuXtubm5vPCFL0wppb2LGBCBOAAAAADACLvwwgvzve99b0/IfUUB+L7Hzc/Pt30JAyMQBwAAAAAYYSeddFLOPvvsgzp38+bN2bx5c65//evn5je/+YArW3sCcQCGkh7ijBI9xAEAgDZt2rTpoM+dn5/P/Px8LrroouzcuXOAVbVDIA7AUNJDnFGhhzgAANC2U045JXNzcwfULmV/+3bs2JE3v/nNOe+883L88ce3fTmrIhAHAAAAABhh69evz9WudrWDPn9ubi5vfvObB1hRe9a1XQAAAAAAAKwFgTgAAAAAAGNBIA4AAAAAwFgQiAMAAAAAMBYE4gAAAAAAjIXJtgsAAAAAAKAdi4uL6Xa7e756vd5e2wsLC5mbm2u7zIERiAMAAAAAjLCXv/zlede73rXqcTZv3jyAatolEAdgKE1NTWV2drbtMmAgOp1O2yUAAABj7JxzzhnIOEcfffRAxmmTQByAodTpdDI9Pd12GbBqMzMzbZcAAACMuRe+8IXZvn37nnYoy61Q9m2PclmtU3bs2JHXv/71+dGPftT2payaQBwAAAAAYIRNTEzkmGOOOejz5+bm8vrXv36AFbVnXdsFAAAAAADAWhCIAwAAAAAwFgTiAAAAAACMBYE4AAAAAABjQSAOAAAAAMBYEIgDAAAAADAWBOIAAAAAAIwFgTgAAAAAAGNBIA4AAAAAwFgQiAMAAAAAMBYE4gAAAAAAjAWBOAAAAAAAY0EgDgAAAADAWBCIAwAAAAAwFgTiAAAAAACMBYE4AAAAAABjQSAOAAAAAMB+LS4u5pJLLmm7jIGZbLsAAAAAAAAOnf/6r//Kv//7v2dpaekKj+12u+n1enu+rzxncvLwj5MP/ysAYCRNTU1ldna27TJgIDqdTtslAAAAY+y9731vfvKTn6x6nGtf+9oDqKZdAnEAhlKn08n09HTbZcCqzczMtF0CAAAw5k4++eR873vf22v29/LXyp8va3t+fj6f+MQncu655+b2t79925ezKgJxAAAAAIARdvWrXz1Xv/rVD/r8ubm5fOITnxhgRe0RiAMAAAAAjLBer5eLLrpov7PAFxYWrnDfxRdf3PYlDIxAHAAAAABghD3lKU8ZyNpG17zmNVdfTMsE4gAMJYtqMkosqgkAALTpJje5yUD+Ltm+ffvqi2mZQByAoWRRTUaFRTUBAIC2Pe5xj8vjHve4/b5Wa92rPcru3bszPz+fXbt2ZdeuXZmfn8/WrVvz0pe+NPPz82tc+eAJxAEAAAAARthpp52WN77xjW2XMRTWtV0AAAAAAACHTillqMZpkxniAAAAAAAj7FGPelQe9ahHHdCxi4uLWVxcTLfbTa/Xy8LCQi688MI89rGPTa31EFd66AnEAQAAAABIkkxMTGRiYiIbNmzYs2/9+vUtVjRYWqYAAAAAADAWzBAHAAAAABgTS0tL6fV6e7VEWf75svZddNFFbZc9MAJxAAAAAIAR9uxnPzsf+9jHVj3OKPQQ1zIFAAAAAGCE3e1udxvIOBMTEwMZp01miAMAAAAAjLDjjjsuxx13XJKklLJnpvdlbe9rcXEx27Zty9LS0toUfAgJxAEYSlNTU5mdnW27DBiITqfTdgkAAMAYe9nLXpatW7e2XcZQEIgDMJQ6nU6mp6fbLgNWbWZmpu0SAACAMXfOOecMZJwNGzYMZJw2CcQBGEpmiDNKzBAHAADatGHDhiwsLKx6nKmpqdUX0zKLagIAAAAAjLBBhOFJcsEFFwxknDYJxAEAAAAARthNb3rTgYwzMTExkHHapGUKAENJD3FGhR7iAABA2xYXFwcyzve+973c9ra3HchYbTFDHAAAAABghN373vduu4ShYYY4AEPJopqMEotqAgAAbfqv//qvgYxz4xvfeCDjtEkgDsBQ0jKFUaFlCgAA0Lbzzz9/IOPUWgcyTpsE4gAMJTPEGSVmiAMAAG265S1vma9//eurHueCCy7IbW5zmwFU1B49xAEAAAAARtjGjRsHMs6uXbsGMk6bzBAHYChpmcKo0DIFAABo2ymnnJL5+fksLi5mcXExCwsL6fV66Xa76Xa7e23vb9/27dvz6le/uu3LGAiBOAAAAADACFu3bl2udKUrHfT5c3NzIxOIa5kCAAAAAMBYEIgDAAAAADAWtEwBAAAAABhji4uL++0p3uv1srCwkG3btrVd4sAIxAEAAAAARtjLXvayvPvd7267jKGgZQoAAAAAwAi7xjWuMZBx1q07/ONkM8QBAAAAAEbYQx7ykDzkIQ/Z8/PS0lJ6vd5ebVL2bZeysLCQhYWFzM/PZ+vWrXnpS1+apaWlFq9iMATiAAAAAAAj7PnPf37++7//e9XjHHXUUQOopl0CcQCG0tTUVGZnZ9suAwai0+m0XQIAADDGtm/fPpBxNm7cOJBx2iQQB2AodTqdTE9Pt10GrNrMzEzbJQAAAGPuec97XhYXF/dqibK8vbCwcIX7tm/fnle+8pX56U9/2valrJpAHAAAAABghJVSMjk5mcnJy4+Da617gvOV4fiFF164RpUeegJxAAAAAIARdtppp+WNb3zjqseZmJgYQDXtWtd2AQAAAAAAHDqDCMMTgTgAAAAAAGNiw4YNbZewagJxAAAAAACu0MLCQtslrJoe4gAAAAAAI+yd73xnPvjBDyZpFtistV7mdq/Xu9SimvPz8zn99NMF4gAAAAAADLeLLrooX/nKVw7q3KWlpezevXvAFbVHIA7AUJqamsrs7GzbZcBAdDqdtksAAADG2Ate8IJ885vfbLuMoSAQB2AodTqdTE9Pt10GrNrMzEzbJQAAAGPu1FNPzZe+9KX9vrZ79+7Mz89n165d2bVrV+bn5/f8vPza9u3bc/bZZ69x1YeGQBwAAAAAYIS95jWvyXve855Vj3PkkUcOoJp2CcQBGEpapjBKtEwBAADaNIgwPEmufvWrD2ScNq1ruwAAAAAAAIbf+eef33YJq2aGOABDSQ9xRoUe4gAAQNtuetObjkwP8NUSiAMwlLRMYZRomQIAALRpaWlpIOMsLCwMZJw2CcQBGEpmiDMqzBAHAADaVkoZqnHapIc4AAAAAMAI+83f/M2BjLNu3eEfJ5shDsBQ0jKFUaJlCgAA0KaPfOQjAxnnute97kDGaZNAHIChpGUKo0LLFAAAoG3Pfe5z853vfCe9Xi/dbnfP9/1tLywsXOq4+fn5fOYzn8n3v//93O52t2v7clZFIA4AAAAAMMKuda1r5VrXutZBnz83N5ctW7YMsKL2HP5NXwAAAAAA4ACYIQ4AAAAAMMZqrVlcXNzTOmXfdioXXnhh2yUOjEAcAAAAAGCEvfrVr86nP/3pvfqF79s//EBs2rTpEFd66AnEARhKU1NTmZ2dbbsMGIhOp9N2CQAAwBj7+Mc/nvPOO+9S+ycnJ7N58+Yce+yx2bRpUzZv3rzX9+XtzZs355hjjslv/uZvtlD9YAnEAQAAAABG2C1ucYts3bo13W43i4uLe/b3er1s374927dvP6BxNm3alHve856Hqsw1IRAHYCh1Op1MT0+3XQas2szMTNslAAAAY+6Zz3zmnu2lpaU9rVJ6vV4WFhb2ap2yv31zc3M55ZRTctFFF7V4FYMhEAcAAAAAGBPr1q3Lhg0bsmHDhgM+ZzkQHwXr2i4AAAAAAADWgkAcAAAAAICxoGUKAAAAAMAIO/fcc/O6170uSVJKSa31creXLS0tpdvtZteuXWta76EkEAcAAAAAGGGPfOQjBzLOpk2bBjJOmwTiAAylqampzM7Otl0GDESn02m7BAAAYIwde+yxufDCC1c1xpFHHpnjjz9+QBW1Rw9xAAAAAIARdtWrXnXVY+zcuTM//elPB1BNu8wQB2AodTqdTE9Pt10GrNrMzEzbJQAAAGNu3brBzIveunXrQMZpk0AcgKGkZQqjRMsUAACgTTt37syNbnSj3Ote98rk5GTWr1+/52vlz5f32hFHHJErX/nKbV/KqgnEAQAAAABGWCkl5513Xt7znvdcYQA+OTmZDRs2XOq4TZs25f73v3+OOeaYti9nVQTiAAwlLVMYFVqmAAAAbXvUox6VM888M91uN71eL91ud6/thYWFzM/PZ2Fh4TJf73a7OeaYY3L/+9+/7ctZFYE4AAAAAMAIO+GEE3LCCScc9Plzc3PZsmVLFhcXB1hVOwTiAAAAAAAjbMeOHfnUpz6VpGmfUmvda7vWutfM8H1nku/cubPN8gdKIA4AAAAAMMIe+MAHptfrrXqcDRs2DKCadgnEARhKU1NTmZ2dbbsMGIhOp9N2CQAAwBgbRBiejEYgvq7tAgAAAAAAOHSuetWrDmSc293udgMZp00CcQAAAACAEfazn/1sIOOcccYZAxmnTVqmADCUOp1Opqen2y4DVm1mZqbtEgAAgDH3hCc8IS996UtXPc6uXbsGUE27BOIAAAAAACPs1re+dZ7ylKek1+ul2+2m2+3utX15Py8sLGTHjh05++yzs7i42PalrJpAHIChZFFNRolFNQEAgDadfPLJ+e53v7vf19avX5/169dncnJyz/cNGzZkcnJyz/bmzZtzpzvdaSR6iAvEAQAAAABGWLfbzVFHHZUb3vCG2bRpUzZt2pTNmzdn48aNewXi+wbhyyH5+vXrs3Hjxlz72tdu+1JWTSAOAAAAADDCjjnmmJx33nk566yzVjXO0572tPz2b//2gKpqx7q2CwAAAAAA4NDZuXPnQMa55jWvOZBx2mSGOABDqdPpZHp6uu0yYNVmZmbaLgEAABhzf/mXf5kzzjgjSbK4uPgLLarZ7XZzySWX5Ic//GG++93vZmpqqt2LWSWBOAAAAADACJuamlpVkD03N5ctW7YMrqAWCcQBAAAAAEbY+eefn7e85S1JklJKaq2Xu50kS0tL6fV6WVhYyK5du9a+6ENEIA4AAAAAMMIe/vCHD2ScjRs3DmScNgnEARhKU1NTmZ2dbbsMGIhOp9N2CQAAwBg77rjjsnXr1lWP0+12B1BNuwTiAAwli2oyKiyqCQAAtO3oo48eSCC+tLQ0gGraJRAHAAAAABhhJ598ct75zncmaXqFX5HlvuG7d+/O/Px8tm/fnrPPPnuvHuOHK4E4AAAAAMAIm52dzXe+8510u930er10u939bi8sLKTX611m8L1hw4Y1rnzwBOIADCU9xBkleogDAABtet/73pfzzz//oM7duHFj1q9fn2OOOSbHH3/8gCtbewJxAIaSHuKMCj3EAQCAth133HEHHYjv3r07u3fvzo4dO/KFL3whv/M7vzPg6taWQByAoWSGOKPEDHEAAKBNP/3pTwcyzh3veMeBjNMmgTgAQ8kMcUaFGeIAAEDbfvjDHw5knDPPPDP3ute9BjJWW9a1XQAAAAAAAMPvGte4RtslrJoZ4gAMJS1TGCVapgAAAG269a1vnbPOOmvV45x//vm59a1vPYCK2iMQBwAAAAAYYXNzc0mSDRs2ZP369ZmcnMz69ev3fC3/PDk5mVJKkqSUklrrnu1jjz02t7/97du6hIERiAMAAAAAjLClpaXc8573zLOe9ay2S2mdHuIAAAAAAIwFM8QBAAAAAEbcmWeemac97Wl7tUuZnJzMhg0bLrVvf9sbN27Mne985xxxxBFtX8qqCMQBGEqdTifT09NtlwGrNjMz03YJAADAmLvyla+c8847L5/73OdWNc5Tn/rU3Pe+9x1QVe0QiAMwlKampjI7O9t2GTAQnU6n7RIAAIAxtmvXroGMs3v37oGM0yaBOABDyQxxRoUZ4gAAQNvWrRvMUpK3utWtBjJOmwTiAAwlM8QZJWaIAwAAbfr2t789kHEWFhYGMk6bBvPRAAAAAAAAI+3cc89tu4RVM0McgKGkZQqjQssUAACgbX/8x3+cN73pTaseZ3FxcQDVtMsMcQAAAACAEfblL395IONc/epXH8g4bTJDHIChpIc4o0QPcQAAoE23utWtctZZZ616nLm5udUX0zKBOABDScsURoWWKQAAQNu++93vDmSco48+eiDjtEkgDsBQMkOcUWKGOAAA0Kbzzz//Co8ppWT9+vVZv359Jicn9/q+fv363OxmN8sd7nCHNaj20BKIAzCUzBBnVJghDgAAtO288867wmNqrVlYWMjCwsJ+Xz/nnHNyz3veM8cff/ygy1tTAnEAhpIZ4owSM8QBAIA2Xfe6183ExETufve77zXre9/tDRs2ZHJyMhMTEymlpNaapJk9ftRRR+XmN795y1eyegJxAAAAAIARd6Mb3SiPetSjUkppu5RWCcQBAAAAAEbYzp07c/rpp+f0009f1Thvf/vbc7WrXW1AVbVDIA7AUNJDnFGhhzgAANC2bdu2DWScubk5gTgAHAp6iDNK9BAHAADadL3rXS8/+MEPLrV/cnIymzdvzqZNm7Jp06a9tjds2LBXb/GNGzfm2GOPbaH6wRKIAzCUzBBnVJghDgAAtO0xj3lMOp1Oer1eut1uut3uZW5v3749P/vZz/bat7CwkIsvvjjXuMY18oAHPKDty1kVgTgAAAAAwAi7613vmrve9a4HdW6tNVu3bs1DHvKQLC0tDbiytScQBwAAAAAYYR/4wAfyhS98Yc+s716vl4WFhSucMb78fdnExESLVzEYAnEAhpIe4owSPcQBAIA2veUtb8l55513qf379hA/+uijL9VLfOX2CSec0EL1gyUQBwAAAAAYYccee+x+A/Fer5ft27dn+/btVzjGEUcckRve8Ia5/e1vfyhKXDMCcQCGkkU1GRUW1QQAANr2whe+MD/72c8udzHNy3ttx44dedvb3pZzzz1XIA4AAAAAwPDasGFDrnWtax30+XNzc3nb2942wIras67tAgAAAAAAYC0IxAEAAAAAGAtapgAAAAAAjLDZ2dmceeaZe3qDLywspNfr7bV9WX3EFxYW0u12kyTr1h3+86sF4gAMpampqczOzrZdBgxEp9NpuwQAAGCMnXbaafnRj36Uq1zlKlm/fn0mJyezfv36vbY3bdq03/3LXxs3bszd7373ti9l1QTiAAAAAAAjrNaa613vern3ve99qdB7w4YNlxmET05OppSSJNm0aVOOOeaYlq9k9QTiAAAAAAAj7LzzzkuSvOpVr1rVOKeeempue9vbDqKk1hz+TV8AAAAAALhM1772tQcyzg1ucIOBjNMmM8QBGEqdTifT09NtlwGrNjMz03YJAADAmPvhD384kHG++93vHvZtUwTiAAwli2oySiyqCQAAtOk+97lPPvShD616nG3btg2gmnYJxAEYSmaIMyrMEAcAANrW7XYHMs5FF100kHHapIc4AAAAAMAIu9Od7jSQcSYnD//51Yf/FQAAAAAAcJnufe975973vvdBnz83N5ctW7ZkcXFxgFW1QyAOAAAAAECWlpbS6/XS7Xb3fO92u7nwwgvbLm1gBOIAAAAAACPsNa95Td70pje1XcZQ0EMcAAAAAGCErV+/fiDjlFIGMk6bzBAHAAAAABhhj3zkI/PIRz7ygI6ttabX6+3VMuVnP/tZHvOYx6TWeogrPfQE4gAAAAAAJGlmga9fvz7r16/Ppk2bkiQTExMtVzU4WqYAAAAAADAWzBAHAAAAABgTi4uL6Xa7e7VE2d/PK/dddNFFbZc9MAJxAAAAAIAR9nd/93f5+Mc/vupxRqGHuJYpAAAAAAAj7B73uMdAxhmFXuJmiAMAAAAAjLAvf/nLufKVr5xSyl6tUZaWln6hcTZv3nyIKlw7AnEAhtLU1FRmZ2fbLgMGotPptF0CAAAwxr74xS8edB/wzZs3Z/PmzTnmmGPyK7/yKwOubO0JxAEYSp1OJ9PT022XAas2MzPTdgkAAMCYe+ITn5ivfOUrWVhYyO7duzM/P59du3Zl165dmZ+fv9TPu3bt2nPu8utbt27NZz7zmWzZsqXFK1k9gTgAAAAAwAh773vfm0984hPp9XqrGufYY48dUEXtEYgDMJS0TGGUaJkCAAC06eyzz95vGD45OZnNmzdn06ZN2bRp057tffctt0y5y13u0kL1gyUQBwAAAAAYYXe/+93zwQ9+MAsLC1lYWEi3202S9Hq9bN++Pdu3bz+gcdatW5f73ve+h7LUQ04gDsBQ0kOcUaGHOAAA0Lavfe1r2bZt26rH2b179wCqade6tgsAAAAAAODQueMd79h2CUPDDHEAhpIe4owSPcQBAIA2feQjH2m7hKEhEAdgKGmZwqjQMgUAAGjbxMTEQMa5wQ1uMJBx2iQQB2AomSHOKDFDHAAAaNNl9f6enJzM5s2bs2nTpmzatGnP9sp9mzZtyvr167Np06bc6EY3WuPKB08gDsBQMkOcUWGGOAAA0LY/+ZM/yRlnnJFut5ter5dut7tne2FhYc++HTt2ZNu2bZd6vdvtJkmOPPLI3P/+92/5alZHIA4AAAAAMMLuda975V73utdBn79t27Y88IEPzOLi4gCrase6tgsAAAAAAGB4lVLaLmFgzBAHAAAAABhhP/jBD3L22Wdfql3K/rb3t2/Xrl1tX8LACMQBAAAAAEbYs5/97JxzzjkHde7mzZuzefPm3OQmN8mtb33rAVe29gTiAAylqampzM7Otl0GDESn02m7BAAAYIxNTh58DDw/P5/5+fls3bo1F1xwQW584xsPsLK1JxAHYCh1Op1MT0+3XQas2szMTNslAAAAY67WOpBxNm3aNJBx2mRRTQAAAACAEXaPe9xjIONccMEFAxmnTQJxAAAAAIAR9vGPf3wg4/R6vYGM0yaBOAAAAADACJuYmBjIOEcdddRAxmmTHuIADCWLajJKLKoJAAC06dxzzx3IOLe61a0GMk6bBOIADCWLajIqLKoJAAC0befOnQMZZ9euXQMZp00CcQCGkhnijBIzxAEAgFEwqGC9TXqIAwAAAABwhX784x+3XcKqmSEOAAAAADDCrnvd62Z+fj63uMUtMjk5mcnJyaxbd+m50qWU1Fr3u33ta187J5xwwprWfSgIxAEAAAAARtzU1FSe9axntV1G6wTiAAAAAAAj7rzzzst73/verF+/PpOTk9mwYUMmJyezfv36y923vL1hw4asX7++7ctYNYE4AAAAAMAIu8Y1rpEvfvGL+da3vrWqcf7xH/8xd77znQdUVTsE4gAMpU6nk+np6bbLgFWbmZlpuwQAAGDM3epWt8p5552XXbt2ZdeuXel2uwc1zrZt2wZc2doTiAMwlKampjI7O9t2GTAQnU6n7RIAAIAxdvrpp+fHP/7xQZ27efPmbN68OVe5ylVyu9vdbsCVrT2BOABDyQxxRoUZ4gAAQNtOPPHEdDqd9Hq9dLvddLvdy9ze376dO3dm69at+cxnPpMHPOABbV/OqgjEAQAAAABG2N3udrfc/va3v1TwvW8ovrCwsN99O3bsyGte85osLS21fSmrJhAHAAAAABhhJ510Uj7/+c+vaoxSSq55zWsOqKL2CMQBGEp6iDNK9BAHAADatNowPElqrbnWta41gGrata7tAgAAAAAAOHQ2btw4kHHWr18/kHHaJBAHAAAAAGAsaJkCwFDqdDqZnp5uuwxYtZmZmbZLAAAAxtzCwsJAxul0OrnOda4zkLHaIhAHAAAAABhh7373u/Oxj33sco9ZWlpKt9tNr9fLwsJCer1eut1uut1udu7cmQ996EMDC9bbJBAHAAAAABhhV77ylXP/+9//oM+fm5vLhz70oQFW1B6BOAAAAADACNu1a1d+8pOf7JkBvjzze+X25b128cUXt30JAyMQBwAAAAAYYU95ylPy1a9+9aDPX79+fY466qjc4AY3GFxRLRGIAzCUpqamMjs723YZMBCdTqftEgAAgDF20UUX5aijjsoNb3jDbNq0KZs2bcrmzZv3+n5Z+zZt2pQNGzbkiCOOyJFHHtn2payaQBwAAAAAYIRt3rw55513Xs4666xVjfO85z0vd73rXQdUVTsE4gAMpU6nk+np6bbLgFWbmZlpuwQAAGDMfetb3xrIOHNzcwMZp00CcQCGkpYpjBItUwAAgDZd7WpXy09/+tNVj7O0tDSAatolEAdgKJkhzqgwQxwAAGjbve51r7z5zW9e9TiLi4sDqKZdAnEAAADGjqfRGCWeRgOuSCllIOPUWgcyTpsE4gAAAIwdT6MxKjyNBhyIRz/60Xn0ox99hcfVWtPr9dLtdtPtdvdsX3jhhXnc4x63BpUeegJxAAAAAABSSsn69euzfv36vfZv3LixpYoGb13bBQAAAAAAwFoQiAMAAAAAMBa0TAEAAAAAGGG9Xi/btm3bb3/wy/p55b4dO3a0fQkDIxAHAAAAABhhT3nKU9LpdFY1xuTkZK5znesMpqAWCcQBGEpTU1OZnZ1tuwwYiNXeeAIAAKzGIP4m6fV6uepVr7r6YlqmhzgAAAAAwAi70pWuNJBxjjzyyIGM0yaBOAAAAADACLv44osHMs73v//9gYzTJoE4AAAAAMAIu8lNbjKQcSYnD/8O3If/FQAwkjqdTqanp9suA1ZtZmam7RIAAIAx9+Mf/3gg4/zwhz/M7W9/+4GM1RYzxAEAAAAARtiOHTsGMs66dYd/nGyGOAAAAADACHvXu96VD33oQ0mSUkpqrfvdXlpaSrfbTbfbTa/X27O9c+fOfPSjH83CwkJr1zAoAnEAAAAAgBF24YUX5owzzjioc2ut2b1794Arao9AHIChNDU1ldnZ2bbLgIHodDptlwAAAIyxF73oRfnGN77RdhlDQSAOwFCyqCajwqKaAABA20499dR8+ctfvtT+paWl7Nq1K/Pz89m1a9ee7eWfd+/enfn5+Wzfvj1nn312C5UPnkAcAAAAAGCEveIVr8j73//+VY+z3G/8cHb4LwsKAAAAAMBluulNbzqQcSYmJgYyTpvMEAcAAAAAGGEPeMAD8oAHPOCgzq215mc/+1n+4A/+IEtLSwOubO2ZIQ4AAAAAwH6VUjI5OTrzqgXiAAAAAACMhdGJ9gEAAAAAuJTFxcXMz8+n2+2m1+ul2+3u2V5YWLjCfTt27Gj7EgZGIA4AAAAAMMJOOumkfOELX1jVGOvWrcs1r3nNAVXUHoE4AENpamoqs7OzbZcBA9HpdNouAQAAGGPbtm076HM3b96czZs35+ijj85Vr3rVAVbVDoE4AEOp0+lkenq67TJg1WZmZtouAQAAGHPPe97z8t3vfvdy26RcXsuUXbt25VOf+lS++tWv5mY3u1nbl7MqAnEAAAAAgBG2uLiYhYWF/Ybfy9tX9NqoEIgDAAAAAIywv/3bv80555xzUOcut0y58Y1vnF/91V8dcGVr74AC8VLKg5PcI8lUktskOSrJm2qtD9/PsddL8vQkt09y/STHJPlZku8kOS3Jv9da9/uRQinlkUkel+RWSRaTnJnklFrr+y7j+Nv1/1t37/93fpTkfUn+vtb64wO5NgCGkx7ijBI9xAEAYLSVUq6V5J+S/E6a7PScJH9ea/1Yq4X1Xe1qVzvoQHx+fj7z8/PZunVrfvzjH+cmN7nJgKtbWwc6Q/xv0gThFyc5L8ktLufYGyf54ySfTfIfSS5MctUk900TiP9JKeXetdbeypNKKackeVJ//Fcn2ZDkoUn+q5Tyl7XWl+9z/O8meXf/Gv4rybf6dT02ye+VUu5aaz33AK8PgCGjhzijQg9xAAAYbaWUqyT5ZJL/S3K/JD9NcqMkP2mxrL1c97rXzWc/+9lVj/OTnwzHJZVSvpdmMva+PlBrvd/lnXuggfhfpwmqv51mpvjpl3Psp5IcU2td2qfI9Uk+nOSEJA9M8vYVr/1amjD8O0mOr7Vu6+9/YZIvJjmllPK+Wuv3+vs3JnlNkvVJHlRrffeKsR6W5M1JXp7k/gd4fQAAAAAAB+OpSS6otT5ixb7vtlXMvhYXFzM/Pz+QsUopAxlnAI5PMrHi52ulyZHfvv/Df27dgYxeaz291np2rbUewLEL+4bh/f3dNDPGk+Sm+7z82P73k5fD8P4530vyiiRHJHnUiuN/Lck1knxhZRjeP+ctSb6U5HdLKfv7lAAAAAAAYFB+P8lnSylvK6X8pJTSKaU8vgxBery4uJinPvWp+Z//+Z+BjLd+/fqBjLNatdaf1lp/tPyVplXN9hxAIL5mi2qWUibSFJYkZ+3z8j373/97P6d+MMmz+sf8XX/fNfvfL6vxzTlpWrzcM8lrD6ZeANqlhzijRA9xAAAYaTdK8hdJXpymj/hUkpf1X3v5ZZyzJj73uc/l61//enq93hUffAC+9rWv5dhjj80d73jHTExMXPEJa6D/wcP/S7N25a4rOv6QBeKllOOSPD5JSXK1JPdKcpMkb661/teK445Mcp0kF9daL9jPUGf3v99sxb6t/e83vIz//I36329+cNUDAAAAAByQdWk6WTy9//OZpZSbJnlcWg7Ezz777OzevXtg433gAx/I6aefnlve8pZ5wQteMCyh+L3S5MSvPpCDD+UM8ePy8xndSVKTnJLkGfscd+X+94suY5zl/VdZse+TSeaSHF9KeUCt9T+XXyilPCTN7PAkOeYXrhqAoWBRTUaFRTUBAGDkXZDka/vs+3qSv2qhlr3c9KY3zcaNG7Nr188nTm/cuDHT09O5zW1uk263m16vl4WFhfR6vXS73T37lre//vWv5z//8z/3zDLftWtXvva1r+Vzn/tc7nKXu7R1aSudmOTztdYvHcjB5QDagu99Qim/kWZRzTfVWh9+AMdPpJkBviXJc9K8Oe5Xa72w//q1k5yf5Pxa63X3c/76JAtJFmqtR6zY/4gkr0sTtL83zUzyWyT53TQ9xKeSvKrW+ue/0AUCAAAAABygUsqbk1yv1vrrK/Y9N8mDaq23aq+y5IQTTphI8qEkd0qyOcl8ks8muc/pp5++eIBjPCvJs7P3epRLSf7u9NNPf95AC/4FlVKunuS8JI+rtR7QDPFDHojvc+5Dk7wlyStqrY/v7zsyycVpWqYctZ9zjkvy0yQ/qbVeY5/XTkjytPz8f9CvJzk1ydWTvDDJc2qtfxcAAAAAgEOglHJ8kk+lCY3fluS2SV6T5Bm11le0WNrIK6U8Nc36k9eqtV58IOes2aKafR/sf/+N5R211p2llPOTXKeUcq399BG/af/7t/YdrNZ6eppwfi+llDf0Nz+/6ooBAAAAAC5DrfXzpZTfT/IPacLZc/vfX9lmXaOuv5jmo5O89UDD8GTvae5r4Tr97/sua/rR/vff3s85993nmMtVSrlKkt9LM6v8f37B+gAAAAAAfiG11vfXWm9Ta91Ya71ZrfWl9RdtzcEv6jfSTKY+oFYpywYeiJdSbtfvG77v/isleUn/x/fv8/Kr+t+fWUo5ZsU5N0izGuslSV67z3j7a6+yOcnr0yzA+be11ksO7ioAAAAAABhWtdbTa62l1vq5X+S8A+oh3p/y//v9H6+Z5D5Jzknyif6+rbXWJ/eP/Y8kd03TN+fcNI3ar5dmpvdV+vvvs+809lLKi5I8MU0T9Hcm2ZDkD5NcNclf1lpfvs/xj0/ypCSzaVZyvWqameHXSvKSWuv0FV8+AAAAAADj4kAD8WcnubzFKb9fa71B/9j7JXlYkjsmuUaaxS63JTkryduTnFZr3bdlyvJ/50/TzAi/VZqVSs9I8sJa6/v2c+ydk/xtmib1V02yPcnnkrys1vrBfY8HAAAAAGC8HVAgDgAAAAAAh7u1XlQTAAAAAABaIRAHAAAAAGAsCMQBAAAAABgLAnEAAAAAAMaCQBwAAAAAgLEgEAcAAAAAYCwIxAEAAAAAGAsCcQAAAAAAxoJAHAAAAACAsSAQBwAAAABgLPz/VRjDNY5/f3cAAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"mn.matrix(df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Fill in missing values for categorical values"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"sex 0\n",
"embarked 2\n",
"dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.select_dtypes('object').isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df['embarked'] = df['embarked'].fillna('')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Tangent: `catboost` is nice\n",
"\n",
"Minimal processing or tuning is required to use `catboost`, making it a nice \"default\" algorithm."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"! python3 -m pip install --quiet catboost"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import catboost"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test =train_test_split(df, target, random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"cb = catboost.CatBoostClassifier()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"cb.fit(X_train, y_train, \n",
" cat_features=['sex', 'embarked'],\n",
" verbose=0);"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.8170731707317073"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cb.score(X_test, y_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Category encoding"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Vanilla encoding\n",
"\n",
"For variables with only a few distinct values, one hot encoding (or dummy variables) is often used. For more values, we can use hash encoding, which is basically the same idea but bins values using a hash function.\n",
"\n",
"We may choose to drop one of the created columns to avoid multicollinearity."
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pclass | \n",
" age | \n",
" sibsp | \n",
" parch | \n",
" fare | \n",
" sex_male | \n",
" embarked_C | \n",
" embarked_Q | \n",
" embarked_S | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 29.0000 | \n",
" 0 | \n",
" 0 | \n",
" 211.3375 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" 0.9167 | \n",
" 1 | \n",
" 2 | \n",
" 151.5500 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1 | \n",
" 2.0000 | \n",
" 1 | \n",
" 2 | \n",
" 151.5500 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1 | \n",
" 30.0000 | \n",
" 1 | \n",
" 2 | \n",
" 151.5500 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 4 | \n",
" 1 | \n",
" 25.0000 | \n",
" 1 | \n",
" 2 | \n",
" 151.5500 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pclass age sibsp parch fare sex_male embarked_C embarked_Q \\\n",
"0 1 29.0000 0 0 211.3375 0 0 0 \n",
"1 1 0.9167 1 2 151.5500 1 0 0 \n",
"2 1 2.0000 1 2 151.5500 0 0 0 \n",
"3 1 30.0000 1 2 151.5500 1 0 0 \n",
"4 1 25.0000 1 2 151.5500 0 0 0 \n",
"\n",
" embarked_S \n",
"0 1 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.get_dummies(df, drop_first=True).head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Target encoding\n",
"\n",
"We can use the target to find a more informative encoding. Note that these methods leak information and are prone to over-fitting."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.simplefilter('ignore', FutureWarning)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"import category_encoders as ce"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"te = ce.TargetEncoder()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pclass | \n",
" age | \n",
" sibsp | \n",
" parch | \n",
" fare | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 29.0000 | \n",
" 0 | \n",
" 0 | \n",
" 211.3375 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" 0.9167 | \n",
" 1 | \n",
" 2 | \n",
" 151.5500 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1 | \n",
" 2.0000 | \n",
" 1 | \n",
" 2 | \n",
" 151.5500 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1 | \n",
" 30.0000 | \n",
" 1 | \n",
" 2 | \n",
" 151.5500 | \n",
"
\n",
" \n",
" | 4 | \n",
" 1 | \n",
" 25.0000 | \n",
" 1 | \n",
" 2 | \n",
" 151.5500 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pclass age sibsp parch fare\n",
"0 1 29.0000 0 0 211.3375\n",
"1 1 0.9167 1 2 151.5500\n",
"2 1 2.0000 1 2 151.5500\n",
"3 1 30.0000 1 2 151.5500\n",
"4 1 25.0000 1 2 151.5500"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"te.fit_transform(df.select_dtypes('number'), target).head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Split data into train and test data sets\n",
"\n",
"Before we go further, we split into test and train data sets to avoid data leakage."
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(df, target)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Category encoding"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### We will be conservative and avoid risk of leakage\n",
"\n",
"Note we don't bother to drop columns - multicollinearity is only a problem when fitting linear models without regularization - this is rarely done in ML (c.f. statistics)."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"ohe= ce.OneHotEncoder(cols=['sex','embarked'], use_cat_names=True)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"X_train = ohe.fit_transform(X_train)\n",
"X_test = ohe.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pclass | \n",
" sex_male | \n",
" sex_female | \n",
" age | \n",
" sibsp | \n",
" parch | \n",
" fare | \n",
" embarked_S | \n",
" embarked_C | \n",
" embarked_Q | \n",
" embarked_ | \n",
"
\n",
" \n",
" \n",
" \n",
" | 414 | \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
" 34.0 | \n",
" 1 | \n",
" 0 | \n",
" 21.0000 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 287 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 61.0 | \n",
" 0 | \n",
" 0 | \n",
" 32.3208 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1279 | \n",
" 3 | \n",
" 0 | \n",
" 1 | \n",
" 14.0 | \n",
" 0 | \n",
" 0 | \n",
" 7.8542 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 711 | \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
" 28.0 | \n",
" 0 | \n",
" 0 | \n",
" 7.2500 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 358 | \n",
" 2 | \n",
" 0 | \n",
" 1 | \n",
" 42.0 | \n",
" 0 | \n",
" 0 | \n",
" 13.0000 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pclass sex_male sex_female age sibsp parch fare embarked_S \\\n",
"414 2 1 0 34.0 1 0 21.0000 1 \n",
"287 1 1 0 61.0 0 0 32.3208 1 \n",
"1279 3 0 1 14.0 0 0 7.8542 1 \n",
"711 3 1 0 28.0 0 0 7.2500 1 \n",
"358 2 0 1 42.0 0 0 13.0000 1 \n",
"\n",
" embarked_C embarked_Q embarked_ \n",
"414 0 0 0 \n",
"287 0 0 0 \n",
"1279 0 0 0 \n",
"711 0 0 0 \n",
"358 0 0 0 "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Impute missing numeric values"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Vanilla imputation\n",
"\n",
"A simple imputation is to fill with mean or median."
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.impute import SimpleImputer"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"si = SimpleImputer(strategy='mean')"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pclass | \n",
" sex_male | \n",
" sex_female | \n",
" age | \n",
" sibsp | \n",
" parch | \n",
" fare | \n",
" embarked_S | \n",
" embarked_C | \n",
" embarked_Q | \n",
" embarked_ | \n",
"
\n",
" \n",
" \n",
" \n",
" | 414 | \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
" 34.0 | \n",
" 1 | \n",
" 0 | \n",
" 21.0000 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 287 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 61.0 | \n",
" 0 | \n",
" 0 | \n",
" 32.3208 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1279 | \n",
" 3 | \n",
" 0 | \n",
" 1 | \n",
" 14.0 | \n",
" 0 | \n",
" 0 | \n",
" 7.8542 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pclass sex_male sex_female age sibsp parch fare embarked_S \\\n",
"414 2 1 0 34.0 1 0 21.0000 1 \n",
"287 1 1 0 61.0 0 0 32.3208 1 \n",
"1279 3 0 1 14.0 0 0 7.8542 1 \n",
"\n",
" embarked_C embarked_Q embarked_ \n",
"414 0 0 0 \n",
"287 0 0 0 \n",
"1279 0 0 0 "
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.select_dtypes('number').head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We illustrate the code but will try more fancy imputation instead.\n",
"\n",
"```python\n",
"X_train[X_train.select_dtypes('number').columns] = \\\n",
"si.fit_transform(X_train.select_dtypes('number'))\n",
"X_test[X_test.select_dtypes('number').columns] = \\\n",
"si.transform(X_test.select_dtypes('number'))\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Fancy imputation\n",
"\n",
"This basically does the same thing as `mice` in R."
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.experimental import enable_iterative_imputer\n",
"from sklearn.impute import IterativeImputer"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"ii = IterativeImputer(random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"X_train[X_train.select_dtypes('number').columns] = \\\n",
"ii.fit_transform(X_train.select_dtypes('number'))"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"X_test[X_test.select_dtypes('number').columns] = \\\n",
"ii.transform(X_test.select_dtypes('number'))"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0, 0)"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.isnull().sum().sum(), X_test.isnull().sum().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Simple example to illustrate differences"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"x = np.array([\n",
" [10, 10],\n",
" [1, 1],\n",
" [2,2],\n",
" [10, 10],\n",
" [10, np.nan],\n",
" [np.nan, 10],\n",
" [np.nan, np.nan]\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[10. , 10. ],\n",
" [ 1. , 1. ],\n",
" [ 2. , 2. ],\n",
" [10. , 10. ],\n",
" [10. , 6.6],\n",
" [ 6.6, 10. ],\n",
" [ 6.6, 6.6]])"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"si.fit_transform(x)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[10. , 10. ],\n",
" [ 1. , 1. ],\n",
" [ 2. , 2. ],\n",
" [10. , 10. ],\n",
" [10. , 9.99968523],\n",
" [10.00094638, 10. ],\n",
" [ 7.1668244 , 7.1666142 ]])"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ii.fit_transform(x)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"X_train.to_csv('data/X_train_unscaled.csv', index=False)\n",
"X_test.to_csv('data/X_test_unscaled.csv', index=False)\n",
"y_train.to_csv('data/y_train_unscaled.csv', index=False)\n",
"y_test.to_csv('data/y_test_unscaled.csv', index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Standardize data"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"scaler = StandardScaler()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"X_train.iloc[:, :] = scaler.fit_transform(X_train)\n",
"X_test.iloc[:, :] = scaler.transform(X_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save processed data for future use"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"X_train.to_csv('data/X_train.csv', index=False)\n",
"X_test.to_csv('data/X_test.csv', index=False)\n",
"y_train.to_csv('data/y_train.csv', index=False)\n",
"y_test.to_csv('data/y_test.csv', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.5 64-bit",
"language": "python",
"name": "python38564bit02a66c47ce504b05b2ef5646cfed96c2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}