diff --git a/lib/data.py b/lib/data.py index 6ed2a35..b49ca48 100644 --- a/lib/data.py +++ b/lib/data.py @@ -82,3 +82,28 @@ def load_data(path: Path): movies = movies[~movies.averageRating.isna()].copy() return movies + + +def load_rating_train_dev_test(movies: pd.DataFrame, train_max_year=2015, dev_max_year=2017, sample_count: int = None): + """ + :param movies: Movies dataframe + :param train_max_year: cut year for training + :param dev_max_year: cut year for dev (and starts test) + :param sample_count: whether to take a sample (useful for testing the code). Ignored when it is None + """ + if sample_count: + movies = movies.sample(sample_count) + + train_df = movies[movies.startYear <= train_max_year] + dev_df = movies[(movies.startYear > train_max_year) & (movies.startYear <= dev_max_year)] + test_df = movies[movies.startYear > dev_max_year] + + X_train = train_df.to_dict(orient='records') + X_dev = dev_df.to_dict(orient='records') + X_test = test_df.to_dict(orient='records') + + y_train = train_df.averageRating.values + y_dev = dev_df.averageRating.values + y_test = test_df.averageRating.values + + return dict(X_train=X_train, y_train=y_train, X_dev=X_dev, y_dev=y_dev, X_test=X_test, y_test=y_test) diff --git a/lib/model.py b/lib/model.py new file mode 100644 index 0000000..a444d2c --- /dev/null +++ b/lib/model.py @@ -0,0 +1,28 @@ +from . import transformers +from sklearn.pipeline import make_union, make_pipeline +from sklearn.feature_extraction import DictVectorizer + + +def get_features_pipe( + use_years: bool, use_genre: bool, + use_director: bool, director_kws: dict = None, post_processing=None): + steps = [] + if use_years: + steps.append(make_pipeline(transformers.YearsAgo(), DictVectorizer(sparse=False))) + + if use_genre: + steps.append(make_pipeline(transformers.GenreDummies(), DictVectorizer(sparse=False))) + + if use_director: + director_kws = director_kws or {} + # cuando hacemos **director_kws usamos ese diccionario para pasar parametros + steps.append(make_pipeline(transformers.DirectorFeatures(**director_kws), DictVectorizer(sparse=False))) + + res = make_union(*steps) + if post_processing: + res = make_pipeline(res, post_processing) + return res + + +def get_model_pipe(features_pipe, model): + return make_pipeline(features_pipe, model) \ No newline at end of file diff --git a/lib/transformers/__init__.py b/lib/transformers/__init__.py index 9ba235e..1a88dcc 100644 --- a/lib/transformers/__init__.py +++ b/lib/transformers/__init__.py @@ -1,3 +1,3 @@ -from .director_features import DirectorFeatures +from .director_features import CrewFeatures, DirectorFeatures from .genre_dummies import GenreDummies from .years_ago import YearsAgo \ No newline at end of file diff --git a/lib/transformers/director_features.py b/lib/transformers/director_features.py index 9638237..1ed8f2b 100644 --- a/lib/transformers/director_features.py +++ b/lib/transformers/director_features.py @@ -2,8 +2,9 @@ import pandas as pd -class DirectorFeatures(BaseEstimator, TransformerMixin): - def __init__(self, min_cnt_movies=2): +class CrewFeatures(BaseEstimator, TransformerMixin): + def __init__(self, field, min_cnt_movies=2): + self.field = field self.min_cnt_movies = min_cnt_movies def fit(self, X, y): @@ -11,33 +12,30 @@ def fit(self, X, y): # Llevamos las cosas de nuevo a un DataFrame y calculamos features por director directors_stats = ( pd.DataFrame(X) - .groupby('director') - .agg({ - 'tconst': 'count', - 'averageRating': ['mean', 'max', 'min'], - 'numVotes': ['mean', 'min', 'max']} + .groupby(self.field) + .agg( + n_films=('tconst', 'count'), + min_rating=('averageRating', 'min'), + avg_rating=('averageRating', 'mean'), + max_rating=('averageRating', 'max'), + min_votes=('numVotes', 'min'), + avg_votes=('numVotes', 'mean'), + max_votes=('numVotes', 'max'), ) ) - # Para hacer flattening de las columnas - # https://stackoverflow.com/questions/14507794/pandas-how-to-flatten-a-hierarchical-index-in-columns - directors_stats.columns = [ - '_'.join(i) - for i in zip(directors_stats.columns.get_level_values(1), directors_stats.columns.get_level_values(0)) - ] - # Guardamos las estadisticas self.directors_stats_ = directors_stats # Diccionario con los datos para los directores comunes self.directors_stats_lk_ = ( - directors_stats[directors_stats.count_tconst >= self.min_cnt_movies].to_dict(orient='index') + directors_stats[directors_stats.n_films >= self.min_cnt_movies].to_dict(orient='index') ) # Valor default para los que consideramos que tenemos demasiado poca data - self.default_ = directors_stats[directors_stats.count_tconst < self.min_cnt_movies].mean(0).to_dict() + self.default_ = directors_stats[directors_stats.n_films < self.min_cnt_movies].mean(0).to_dict() if self.min_cnt_movies > 1: - self.default_ = directors_stats[directors_stats.count_tconst < self.min_cnt_movies].mean(0).to_dict() + self.default_ = directors_stats[directors_stats.n_films < self.min_cnt_movies].mean(0).to_dict() else: self.default_ = directors_stats.mean(0).to_dict() return self @@ -45,8 +43,14 @@ def fit(self, X, y): def transform(self, X): res = [] for e in X: - if e['director'] in self.directors_stats_lk_: - res.append(self.directors_stats_lk_[e['director']]) + if e[self.field] in self.directors_stats_lk_: + res.append(self.directors_stats_lk_[e[self.field]]) else: res.append(self.default_) - return res \ No newline at end of file + return res + + +# Para retrocompatibilidad del material en el curso +class DirectorFeatures(CrewFeatures): + def __init__(self, min_cnt_movies=2): + super().__init__(field='director', min_cnt_movies=min_cnt_movies) diff --git a/notebooks/clase-1/01_get_the_data.ipynb b/notebooks/clase-1/01_get_the_data.ipynb index 092ebb7..f2c96b6 100644 --- a/notebooks/clase-1/01_get_the_data.ipynb +++ b/notebooks/clase-1/01_get_the_data.ipynb @@ -87,38 +87,36 @@ "outputs": [], "source": [ "# descargamos los datos\n", - "!wget https://machine-learning-practico.s3.amazonaws.com/aclImdb_v1.tar.gz -O $DATA_HOME/aclImdb_v1.tar.gz\n", - "!wget https://machine-learning-practico.s3.amazonaws.com/movie_gross.csv -O $DATA_HOME/movie_gross.csv\n", - "!wget https://machine-learning-practico.s3.amazonaws.com/name.basics.tsv.gz -O $DATA_HOME/name.basics.tsv.gz\n", - "!wget https://machine-learning-practico.s3.amazonaws.com/title.akas.tsv.gz -O $DATA_HOME/title.akas.tsv.gz\n", - "!wget https://machine-learning-practico.s3.amazonaws.com/title.basics.tsv.gz -O $DATA_HOME/title.basics.tsv.gz\n", - "!wget https://machine-learning-practico.s3.amazonaws.com/title.crew.tsv.gz -O $DATA_HOME/title.crew.tsv.gz\n", - "!wget https://machine-learning-practico.s3.amazonaws.com/title.principals.tsv.gz -O $DATA_HOME/title.principals.tsv.gz\n", - "!wget https://machine-learning-practico.s3.amazonaws.com/title.ratings.tsv.gz -O $DATA_HOME/title.ratings.tsv.gz" + "!wget https://datasets.imdbws.com/name.basics.tsv.gz -O $DATA_HOME/name.basics.tsv.gz\n", + "!wget https://datasets.imdbws.com/title.akas.tsv.gz -O $DATA_HOME/title.akas.tsv.gz\n", + "!wget https://datasets.imdbws.com/title.basics.tsv.gz -O $DATA_HOME/title.basics.tsv.gz\n", + "!wget https://datasets.imdbws.com/title.crew.tsv.gz -O $DATA_HOME/title.crew.tsv.gz\n", + "!wget https://datasets.imdbws.com/title.principals.tsv.gz -O $DATA_HOME/title.principals.tsv.gz\n", + "!wget https://datasets.imdbws.com/title.ratings.tsv.gz -O $DATA_HOME/title.ratings.tsv.gz" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Wl0qAm-hboiQ" - }, + "metadata": {}, "outputs": [], "source": [ - "# descomprimimos \n", - "!ls $DATA_HOME/*.gz | grep -v aclImdb_v1.tar.gz | xargs -I% gunzip \"%\"" + "# PARA DESCARGAR movie_gross.csv bajalo a mano de acá\n", + "# https://drive.google.com/file/d/1Aav7imwH7s1U2W3Olwgyd1tzUcYGUtcu/view?usp=sharing" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Wl0qAm-hboiQ" + }, "outputs": [], "source": [ - "# Opcional, no lo usamos en la materia, toma mucho tiempo en descomprimir\n", - "!tar -C $DATA_HOME -vxf $DATA_HOME/aclImdb_v1.tar.gz" + "# descomprimimos \n", + "!ls $DATA_HOME/*.gz | xargs -I% gunzip \"%\"" ] } ], diff --git a/notebooks/clase-3/01-rating-distribution.ipynb b/notebooks/clase-3/01-rating-distribution.ipynb new file mode 100644 index 0000000..37a57c4 --- /dev/null +++ b/notebooks/clase-3/01-rating-distribution.ipynb @@ -0,0 +1,461 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from google.colab import drive\n", + "\n", + "drive.mount('/content/gdrive')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Llevamos el repositorio a nuestro drive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Esto solo debemos ejecutarlo una vez. Si lo ejecutamos mas de una vez va a fallar (pero no pasa nada!)\n", + "!mkdir /content/gdrive/My\\ Drive/ml-practico/code\n", + "# Acá podes usar tu propio repo si queres hacer modificaciones (highly recommended)\n", + "!git clone https://github.com/elsonidoq/machine_learning_practico /content/gdrive/My\\ Drive/ml-practico/code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Si hay cambios en el repositorio, con esta linea actualizas tu drive\n", + "!cd /content/gdrive/My\\ Drive/ml-practico/code; git checkout clase-3; git pull" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('/content/gdrive/My Drive/ml-practico/code')" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "# Para trabajar local\n", + "sys.path.append('/Users/przivic/prog/machine_learning_practico')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Loading the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from lib import model, data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "# Para trabajar en colab\n", + "PATH = Path('/content/gdrive/My Drive/ml-practico/data/')\n", + "\n", + "# Para trabajar local\n", + "PATH = Path('../../data/')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading title basics...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/przivic/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3343: DtypeWarning: Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading title ratings...\n", + "Loading movie directors...\n" + ] + } + ], + "source": [ + "movies = data.load_data(PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_df = movies[movies.startYear <= 2017]\n", + "test_df = movies[movies.startYear > 2017]\n", + "\n", + "len(train_df), len(test_df), len(test_df) / len(train_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y_train = train_df.averageRating\n", + "y_test = test_df.averageRating\n", + "\n", + "X_train = train_df.to_dict(orient='records')\n", + "X_test = test_df.to_dict(orient='records')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Modelling per se" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Armamos todos los problemas de clasificacion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "thresholds = np.linspace(1.5, 9.5, 15)\n", + "thresholds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# El punto medio entre cada par de threshold, util para graficar\n", + "mids = [(t0 + t1)/2 for t0, t1 in zip(thresholds[:-1], thresholds[1:])]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_bools(y):\n", + " res = []\n", + " for t in thresholds:\n", + " res.append(y <= t)\n", + " return res\n", + "\n", + "ys_train = get_bools(y_train)\n", + "ys_test = get_bools(y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Armamos los modelos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "models = [\n", + " model.get_model_pipe(\n", + " features_pipe=model.get_features_pipe(\n", + " use_years=False, use_director=True, use_genre=True, post_processing=StandardScaler()\n", + " ),\n", + " model=LogisticRegression() \n", + " )\n", + " for _ in range(len(thresholds))\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Entrenamos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i, m in enumerate(models):\n", + " print(i)\n", + " m.fit(X_train, ys_train[i])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualizamos los modelos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "from random import randint" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbUAAAEICAYAAADY/mp2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdd3gVZdr48e+dRiCEmgSBBEIPLYSQUKQpKGJDwQLY3VUsi2752dbd9fV1m7qurro2XnWxgwUVhbVRVHqoUhJ6SEJJQidA+vP7YybxcEgnJ3Nyzv25rlw558wzM/eZMzP3zDPPPCPGGJRSSilfEOB0AEoppVR90aSmlFLKZ2hSU0op5TM0qSmllPIZmtSUUkr5DE1qSimlfIYmtToSkcdF5N0qhm8WkQsaMKRaE5HFInJHJcM6iUieiARWMry6758uIhfVV6zeQERiRcSISFAlwx8VkdcbOq76JCIjRWSry/tKf0cRuUBEss5hXuNE5LO6jt9Q7N/8pIj81elY/IWILBSRfBFZYr9vJyKpItKkunGrTWpO7pxE5DYRKbF3rq5/HRo4jlpvvMaYvsaYxR6IpaJl8u/6no8xJsMY09wYU1Lf0/YEO0EfqclKX4tp1mrdN8b8zRhT4UFCPcRStmPNE5G9IvJsZQcc58IY86Mxpld9T7cSfwOeBBCRKBH5QET2icgxEVkqIkNcC4vIDSKyx14On4lIG5dh14vIMhE5JSKL3WckImNEZK2IHBeRXSIyrZaxDjDG/MGeVqyIpLtMO11ETovICRE5asdxt4j4zElDdQex9TD9M5apMWYMcLfL+2xgEVDt73ZOC72yI9Z6ttzeubr+7avPGTTQ96hP7stkem1GFosvbXCxwEjAABMcDcazBhhjmgNjgRuAOx2Op85EJBloaYxZYX/UHEgBBgFtgLeAeSLS3C7fF3gNuBloB5wCXnaZ5GHgX9hJ0m1ewcCn9vgtgcnAsyIyoB6/0pXGmHCgsx3Dw8Ab9Tj9Rq2e9rHvAXdVV6hWOzb7LGGpiDwnIoeBx0Wkm32qeEhEDorIeyLSymWcdBF5QER+so/AZotIaO2/z1mxPCIiH7t99ryIvGC/bikib4jIfvvI9i9lR7YVfI8/i8hhEenvMq0o++irM/BfoEMFZ4ohIvK2fYS2WUSS3L73Rfbrx0XkwyrKJorIOnvYR/Yy+ksdlsn5IpJiL+cUETnfZdhiEfmriCzF2iF0tQd1E5FV9jiflx39iltVm4h0EZHv7Ri/BSLc5n2zfRR9SET+4DYswP69dtrDP6xgPreKSIa9Dp0xfg3cAqwAZgK3us17poi8JCLz7NhXikg3e9hLIvJPt/JfiMhvROQdoBPwhf2bP+RS7MaKYnU9mq3ue4lIUxF5S6yzy1QReUhqWBtgjEkDfgT62dO6U0R22Ovw3LL1UyzPiUiO/fv+JCJl41wmIlvsZbJXRB6wP6+oViLZLntERP4jlWy/ItJBRD4RkVwR2S0i91fxNS4Fvnf5TruMMc8aY/YbY0qMMTOAEKDsrPFG4AtjzA/GmDzgT8AkEQm3x//OGPMhUNEBbxugBfCOsaQAqUCfKuKrE2PMMWPMXKzEeavL8m5pb/+59nbyR3E5sLR/w1T799giIon250ZEuruUm1m2byj7rex1J0esfd3V9m+7zV4fHnUZt07boYiMBx4FJtvbwgb789tdYt4lIne5zKsstodF5ADwHxHZJCJXupQJtueTUMPFuxLoKtY+ucofoco/IB24yH59G1AM3AcEAU2B7sDFQBMgEvgB+Jfb+KuADlgrVypwt8vwo8CISuZ9G7CkkmGdsXbOLez3gcB+YKj9/jOsI7MwIMqO4a4qvsfLwFMu0/811kYEcAGQ5Tb/x4F84DJ73n8HVlSy3Coti7Xh7rHnFwxMAgqBv9RmmdjL9gjWkWwQMNV+39YevhjIAPraw4Ptz/Zi7RzDgE+Ad+3ysVhnPkH2++XAs/bvPAo44VK2D5Bnf97ELlfs8v1/g5V0ou3hrwEfuM3n/+zfYQBQAPSubt10+e47gHuxjvKLgHYuw2ZiHcUPtr/3e8Ase9hgrJ1ggP0+Amudauf+G9YkVvt3dl9+lZV9Emun3tpeLj/hto65fUcDdHdZ3geAXwJjgINAor1sXwR+sMtdAqwBWgEC9Aba28P2AyPt162BxIrWdXsZbAJisNaxpdjrpmtZrAPkNcBjWOt0V2AXcEkl3+cj4MEqvm8C1jbT0n7/OfCwW5k8YJDbZ3cAiyuY3vvAr7C2v2FADhBTw/WrfNlXt490+zwDuMd+/bb9HcLtdWMb8Et72HVY22Gy/Tt1BzpXNG+s9dl1+RfbyzwY68w91/6u4Vjbej7Q9Vy3Q1zWbZdYLge62TGPxtp2Et1ie8qeV1PgIWC2y/hXARurWK634bavw9pOJlT5e9XgBy3/weyZZFRT/mpgndv4N7m8fxp4tYYr0232gjnq8rfTZfgS4Bb79cVlw7CqJwqApi5lpwKLKvsewBAgk593cKuB6yva0F1+5O9c3vcBTley3Coti5UI9gLi9r2qSmruy2QoVjJb5VZ2OXCb/Xox8ITb8MXAk25xFWJt+LHYSQ3rjKUYCHPbSZTtwB/DThT2+zB7OmXfPxUY6zK8PVbyCXKZT7TL8FXAlBquIyPsaUXY79OA37rtBF53eX8ZkObyPhW42H49HZhf0W/otuFXGCsVJ7XKyp6xw8faGVeX1I5jHajsBP6ClUjeAJ52KdfcXh6xWAlvm71+BLhNLwOrKqeF2+cXcHZScz0IvYyft7Pysljbj/s29XvgP5V8n29dp+s2rAWwEfi9y2cL3MtjbTcXuH1WWVK7EsjGWo+LgTtrsn65LPu6JLUVwB+wtqcCoI/LsLvK4gS+Bn5dk3lzdlI7DQTa78Pt8kNcyq8Brj7X7ZAKkloFsX5W9j3s2AqBUJfhHbAOhstORD4GHqpierdxdlJbir3Pr+yvLtdVMl3fiFVNN8uuwjgOvItb1RTWUWWZU1gbXk2tMMa0cvnr5jLsfaxkBdY1hvft152xjlz2i3Xh9ijWUUlUZd/DGLMSOAmMFpE4rKOludXE5v69QqXyuuPKynYA9hr7F6sotgq4L5MV9nT2uJXbA3SsZrqun+3BWm7uv18H4Igx5qRbWdfh5dOxyx1yGd4Z+NTlt0gFSrAOPsrUdR25FfjGGHPQfv8+blWQ1Uz7LeAm+/VNwDs1mGdtYq2s7BnLjOp/c7COglsbY7oZY/5ojCnF7Xc3VtXcIaCjMWYh8G/gJSBbRGaISAu76DVYCWqPWNXKw6qYr/s6UlFDrc5YVfRHXX7nRznzN3Z1BGsnfAYRaQp8gbWO/91lUB5WsnPVAmsnWSV7e56NVU0dgnUG85CIXF7duOeoI1YtQQQ/18iUcd02Y7AOVOrikPm5Mddp+3+2y/DT/LzO1et2KCKXisgKu5rzKNb65LrvyDXG5Je9MVZbiKXANWJdoroUq+akNsKxDuQrVZekZtze/93+LN4Y0wJrxyB1mG5dfARcICLRwER+TmqZWEdGES47/hbGmL4u47p/D/h5B3cz8LHLD1JR2fqyH+goIq7LLKYO09mHtdK66oR1NFumou/hOq9OWEduB93K7Adai0iYW1nX4eXTEZFmQFuX4ZnApW6JONQY4xpbrdk7wOuxDkQO2HX3vwUGSM0bAbwLXGWX7411tFnG0797tMv7uvzm4Pa7279RW+zf3RjzgjFmENaOvCfwoP15ijHmKqwDvc+AD6uYh/s6UtF1q0xgt9tvHG6MuaySaf5kx1NOrJarn9mxuzcI2IxVJVZWtitWtda2KuIu0w/Yaoz52hhTaozZCszD2ql6hFgNYTpi1bocxNquXLdP120zE6saryKngGYu7887h7DOZTs8Y1uwf6tPgGewqutbAfM5c99f1T72OqwGbzXeB9gnAd2BDVWVq48WcOFYR1FHRaQj9kbTEIwxuVhVaP/B2qBS7c/3A98A/xSRFvYF0m4iMrqaSb6DlRxvwqoDL5MNtBWRlvX9HbCqCEuA6SISJCJXYV3rqa35QE+xmj0HichkrOrEL6sZ7yYR6WMnoiewkvkZzfiNMXuwqmP/V0RCRGQEVnVOmY+BK0RkhIiE2NNxXbdeBf5adoFXRCLt71kt+4JzZcnlaqxl1wfrGkwCVmL6EeuovFrGmCysVnfvAJ8YY067DM7m5wY19e1D4Pci0trebmrVgtXF+8DtIpJg72j+Bqw0xqSLSLKIDBGr9d9JrOsrJfZveKOItDTGFGFVa1Z168avRCTablTwKNZZj7tVwHG7YUBTEQkUkX72zr0i87GuwwDlLRQ/xjqzuMU+C3X1HnClWPfRhWGtY3OMMSfs8QPFasASBASISKg9TYB1QA+xmvWLWA2FrsDeOVazjtWKvb+5ApiFVV230d6ePsTaBsLt7eB3WAdUAK8DD4jIIDu+7i6NIdYDN9jfb7zrMquDOm+HWNtCrPzcuCUE66AiFygWkUuBcTWYzmdY139/zZn72JoYDKTb+6NK1UdS+1+sII9hHf3Mqc3IYrWmGVlFkWFy9n1qrhvK+8BF/HyWVqasqmELVlXHx1h1yJWyd3BrsY4wfnT5PA34ANhln7rX231yxphCrMYhv8Q6rb4JKxEV1HI6h7A21P+HVf30EHCFS7VcZd7Bqqc/AIQClbVYuwHruslh4H9wWSGNMZuxLsK/j3UGcgRwbUH3PFZV7jcicgLrWsMZ9yBVIQYr8VfkVqxrNhnGmANlf1hVbjdWURXs7i2gP2dXPf4d+KP9mz9Qw2nV1BNYy2g38B3W+lmr3xzAGLMAqyXgJ1jLvhswxR7cAuvC/xGs6q5DWEfWYNVGpIt1yeBufq6Crcj7WAeJu+y/s1rm2jvuK7EOLHZjnZ28jtWEvqK41wLH5Od70c7HWn/HYR0g57nuG+x17G6s5JaDdTB9r8skb8ZKiK9g3d5x2v7uGGN2Ar8AXsBK4N/by6usyX1V61hNfWGv25lY19GeBW53GX4f1oHFLqyzt/eBN+34PgL+an92AmvHX3YP3q+xlutRrBag53Kz+rlshx/Z/w+JyFr7YOJ+rGR9BGv/UN3lGuyDxk+ALtQyV2B9/1erKyRnXspRIvImsM8Y80cHY1iJ1ZjmP07F4C3E6qHjI2PM1x6cxyiso+bYCs4QGoSI3IN1Uf5cjsQbFREZB9xrjLna4TiqXMdEJB/rgOMFY8yfGjQ4HyQijwE9jTGVHkiJddvQUKzGb2NFJArrYGSg63W6CsfVpPYzsW7iXY+14HY34HxHA1uxjm7Ljka62tWoyoPsKqpZwAZjzBMNON/2WFWby4EeWLUc/zbG/KuhYlCqodlV2OuAm40xP3hiHj7Tq8S5EpE/Y92P84+GTGi2Xlj1+8ewqg+v1YTmeSLSG6tapz1WbxQNKQSrRe4JYCHWPUwvVzmGUo2YiNyJVT37X08lNNAzNaWUUj5Ez9SUUkr5jMbWkW+VIiIiTGxsrNNhKKVUo7JmzZqDxphIp+OoDz6V1GJjY1m9erXTYSilVKMiIlXe+9WYaPWjUkopn6FJTSmllM/QpKaUUspn+NQ1tYoUFRWRlZVFfn6VN6ErHxcaGkp0dDTBwcHVF1ZKNVo+n9SysrIIDw8nNjYWkYZ6eIDyJsYYDh06RFZWFl26dHE6HKWUB/l89WN+fj5t27bVhObHRIS2bdvq2bpSfsDnkxqgCU3pOqCUn/D56kelvMJ/H7H+X/qks3F40FOrnqL3OytpKb0Y/txfCQr0i2Nm5WV0rfOwo0eP8vLLzvdTm56ezvvv//zIudWrV3P//ZU9Ok3VuwMbrT8flnY4DbZlcGDNT8zfdMDpcJSf0qTmYVUltZKSqh42XHvFxcWVDnNPaklJSbzwwgv1On+likqsx9HNXNrQD7pQyqJJzcMeeeQRdu7cSUJCAg8++CCLFy/mwgsv5IYbbqB///6kp6fTr1+/8vLPPPMMjz/+OAA7d+5k/PjxDBo0iJEjR5KWlnbW9B9//HGmTZvGuHHjuOWWW0hPT2fkyJEkJiaSmJjIsmXLyuP48ccfSUhI4LnnnmPx4sVcccUV5dP4xS9+wQUXXEDXrl3PSHZ//vOfiYuL4+KLL2bq1Kk888wzZ8WgFMCx00WUGghvGszajKP8lHXU6ZCUH/Kva2r/faT+q4DO61/ldZInn3ySTZs2sX79egAWL17MqlWr2LRpE126dCE9Pb3ScadNm8arr75Kjx49WLlyJffeey8LFy48q9yaNWtYsmQJTZs25dSpU3z77beEhoayfft2pk6dyurVq3nyySd55pln+PLLL8vjcJWWlsaiRYs4ceIEvXr14p577mHDhg188sknrFu3juLiYhITExk0aFDtl5HyCzknChCBnlHNCQsJZOaydJ69PsHpsJSf8XhSE5HxwPNAIPC6MeZJt+E3Ag/bb/OAe4wxG+xh6VgPUSwBio0xSZ6OtyEMHjy42vul8vLyWLZsGdddd135ZwUFBRWWnTBhAk2bNgWsm82nT5/O+vXrCQwMZNu2bTWK6fLLL6dJkyY0adKEqKgosrOzWbJkCVdddVX5tK+88soaTUv5n8MnCzlyspDgwACCAwO4dlA0H6zK5PeX9iYyvInT4Sk/4tGkJiKBwEvAxUAWkCIic40xW1yK7QZGG2OOiMilwAxgiMvwC40xB+slIC9peRYWFlb+OigoiNLS0vL3ZfdSlZaW0qpVq/IzvJpO77nnnqNdu3Zs2LCB0tJSQkNDaxRTkyY/73gCAwMpLi5GHyCramrO2ixKjSHEbvF4y/mxvLV8Dx+syuD+sT0cjk75E09fUxsM7DDG7DLGFAKzgKtcCxhjlhljjthvVwDRHo6pQYWHh3PixIlKh7dr146cnBwOHTpEQUFBefVgixYt6NKlCx999BFg9YqxYcOGaud37Ngx2rdvT0BAAO+88055Y5Tq4qjIiBEj+OKLL8jPzycvL4958+bVanzlH4wxzE7JpHloEAH27YDdIpszumck767YQ2FxadUTUKoeeTqpdQQyXd5n2Z9V5pfAf13eG+AbEVkjItMqGkFEponIahFZnZube84B17e2bdsyfPhw+vXrx4MPPnjW8ODgYB577DGGDBnCFVdcQVxcXPmw9957jzfeeIMBAwbQt29fPv/882rnd++99/LWW28xdOhQtm3bVn4WFx8fT1BQEAMGDOC5556rUezJyclMmDCBAQMGMGnSJJKSkmjZsmUNv7nyF2szjrI9J4+o8DNrBW4bHkvOiQL+u2m/Q5EpfySerGISkeuAS4wxd9jvbwYGG2Puq6DshcDLwAhjzCH7sw7GmH0iEgV8C9xnjPmhsvklJSUZ94eEpqam0rt373r7Tv4mLy+P5s2bc+rUKUaNGsWMGTNITEx0Oqw6cXRd+M/l1v/bfe9s96GPN/DlT/tJGjqLG1/aQlybODq/8zalpYaxz35P62bBzLl3uNNhqiqIyBpfabPg6TO1LCDG5X00sM+9kIjEA68DV5UlNABjzD77fw7wKVZ1pmpA06ZNIyEhgcTERK655ppGm9CUZ5zIL+KLDfu5Mr4DgQFndkUWECDcMqwzazOOsiFTm/erhuHppJYC9BCRLiISAkwB5roWEJFOwBzgZmPMNpfPw0QkvOw1MA7Y5OF4lZv333+f9evXk5aWxu9//3unw1Fe5suf9nO6qITJg2MqHH7toGjCQgJ5a1l6wwam/JZHk5oxphiYDnwNpAIfGmM2i8jdInK3XewxoC3wsoisF5Gy+sN2wBIR2QCsAuYZY77yZLxKqdqZlZJJr3bhDIxpVeHw8NBgrkuK4Yuf9pFzQp+SoDzP4/epGWPmA/PdPnvV5fUdwB0VjLcLGODp+JRSdZO6/zgbMo/y2BV9qnwKwi3DOjNzWTofrMzk1xdp837lWdpNllKqTmanZBISGMDEgVU1aIaudvP+91Zq837leZrUlFK1ll9Uwqfr9nJJv/NoHRZSbXlt3q8aiiY15VGuHSfPnTuXJ5+svFcX9yca7Nu3j2uvvdbjMara+3rzAY6dLmJKcsUNRNyN7hFJl4gwZmqDEeVhmtQaqfp+bE1DzH/ChAk88sgjlQ53T2odOnTg448/rlN8yrNmp2QS06Ypw7q2rVH5gADh1mGdWZdxlPXavF95kCa1BnD11VczaNAg+vbty4wZMwB45ZVXeOihh8rLzJw5k/vus+5Jf/fddxk8eDAJCQncdddd5QmkefPm5b2PLF++nCeeeILk5GT69evHtGnTyvtqTElJIT4+nmHDhvHggw+WP9qmpKSEBx98kOTkZOLj43nttdfOijU9PZ24uDhuvfVW4uPjufbaazl16hQAsbGxPPHEE4wYMYKPPvqIb775hmHDhpGYmMh1111HXl4eAF999RVxcXGMGDGCOXPmnPEdp0+fDkB2djYTJ05kwIABDBgwgGXLlp31mB7Xx/Lk5+dz++23079/fwYOHMiiRYvKpzlp0iTGjx9Pjx49zlimyjP2HDrJsp2HmJwUQ0BA5Q1E3F0zKJrmTYK0eb/yKL969MxTq56yns5bj+LaxPHw4IerLPPmm2/Spk0bTp8+TXJyMtdccw3XXnstw4YN4+mnnwZg9uzZ/OEPfyA1NZXZs2ezdOlSgoODuffee3nvvfe45ZZbOHnyJP369eOJJ54AoE+fPjz22GMA3HzzzXz55ZdceeWV3H777cyYMYPzzz//jDOjN954g5YtW5KSkkJBQQHDhw9n3LhxZz0xYOvWrbzxxhsMHz6cX/ziF7z88ss88MADAISGhrJkyRIOHjzIpEmT+O677wgLC+Opp57i2Wef5aGHHuLOO+9k4cKFdO/encmTJ1e4TO6//35Gjx7Np59+SklJCXl5eWc9psf1sTwvvfQSABs3biQtLY1x48aVP4Fg/fr1rFu3jiZNmtCrVy/uu+8+YmJqVi2mau/D1ZkECFw7qHbLODw0mGsHRfPeyj38/rK4s7rVUqo+6JlaA3jhhRcYMGAAQ4cOJTMzk+3btxMZGUnXrl1ZsWIFhw4dYuvWrQwfPpwFCxawZs0akpOTSUhIYMGCBezatQuwes+/5ppryqe7aNEihgwZQv/+/Vm4cCGbN2/m6NGjnDhxgvPPPx+AG264obz8N998w9tvv01CQgJDhgzh0KFDbN++/ax4Y2JiGD7c6tbopptuYsmSJeXDypLUihUr2LJlC8OHDychIYG33nqLPXv2kJaWRpcuXejRowciwk033VThMlm4cCH33HNP+feqrk/JJUuWcPPNNwMQFxdH586dy5Pa2LFjadmyJaGhofTp04c9e/ZUOS1Vd8UlpXy0OosLe0VxXsvaJ6VbhnWmqMTwwcrM6gsrVQd+daZW3RmVJyxevJjvvvuO5cuX06xZMy644ILyx8tMnjyZDz/8kLi4OCZOnIiIYIzh1ltv5e9///tZ0woNDSUwMBCwquPuvfdeVq9eTUxMDI8//jj5+flVPi7GGMOLL77IJZdcUmXM7vccub4v6yDZGMPFF1/MBx98cEbZ9evXV3nPUl1V9b0qemyO8ozFW3PJOVHA5Bo2EHHXNbI5F/SK5N2Ve7jngm6EBOlxtapfukZ52LFjx2jdujXNmjUjLS2NFStWlA+bNGkSn332GR988EH5GdDYsWP5+OOPycnJAeDw4cMVnnmUJcaIiAjy8vLKG1S0bt2a8PDw8vnMmjWrfJxLLrmEV155haKiIgC2bdvGyZMnz5p2RkYGy5cvB+CDDz5gxIgRZ5UZOnQoS5cuZceOHQCcOnWKbdu2ERcXx+7du9m5c2f5+BUZO3Ysr7zyCmBd6zt+/HiVj8cZNWoU7733XnncGRkZ9OrVq8KyynNmpWQSGd6EC+Oi6jyN286PJVeb9ysP0aTmYePHj6e4uJj4+Hj+9Kc/MXTo0PJhrVu3Lq8uGzzY6qu5T58+/OUvf2HcuHHEx8dz8cUXs3//2Rt/q1atuPPOO+nfvz9XX301ycnJ5cPeeOMNpk2bxrBhwzDGlFft3XHHHfTp04fExET69evHXXfdVeFZTe/evXnrrbeIj4/n8OHD5dWEriIjI5k5cyZTp04lPj6eoUOHkpaWRmhoKDNmzODyyy9nxIgRdO7cucLl8vzzz7No0SL69+/PoEGD2Lx5c5WP6bn33nspKSmhf//+TJ48mZkzZ55xhqY8L/t4Pou25nDtoGiCA+u+6xilzfuVB3n00TMNTR89Yyl7XAzAk08+yf79+3n++edrNG56ejpXXHEFmzb5Xt/R+uiZc/PSoh384+utLHrgArpEhJ01/PavbmfKi5vLHz1TlZlLd/P4F1v47FfDSaik30jVcPTRM8qrzZs3j4SEBPr168ePP/7IH//4R6dDUo1caan1dOuhXdtUmNBqS5v3K0/RpOaDJk+ezPr169m0aRPz5s0jMjKyxuPGxsb65FmaOjcrdh0i4/AppiR3qpfplTXv/1J771f1TJOaUqpas1IyaREaxPh+59XbNG89P5aiEsP7KzPqbZpKaVJTSlXpyMlCvtp0gIkDOxIaHFhv0+0SEcYFvSJ5b2WG9t6v6o0mNaVUlT5bv5fCklIm11PVoytt3q/qmyY1pVSljDHMWpVJfHRL+nRoUe/TH9Ujkq4RYfxnaXq9T1v5J01qjYwvPsrltttu0974vdSGrGNszT5R5x5EqhMQINx6fizrM4+yLuOIR+ah/IsmNS+hj3JR3mh2SgZNgwOZMKCDx+ahzftVfdKk5mG++iiX2NhYHn30UYYNG0ZSUhJr167lkksuoVu3brz66quAVXVV9uib/v37M3v27PLPp0+fTp8+fbj88svLuwQDWLNmDaNHj2bQoEFccsklFfamohrGyYJi5q7fx+Xx7QkPDfbYfJo3CeLaQdHM27hfm/erc+ZXHRof+NvfKEit30fPNOkdx3mPPlplGV99lEtMTAzLly/nt7/9LbfddhtLly4lPz+fvn37cvfddzNnzhzWr1/Phg0bOHjwIMnJyYwaNYrly5ezdetWNm7cSHZ2Nn369OEXv/gFRUVF3HfffXz++edERkaWP47nzTffrPXvos7dvJ/2c7KwpMZPt5e7BfAAACAASURBVD4Xt54fy8xl6by/MoPfXNTT4/NTvkvP1BqArz7KZcKECQD079+fIUOGEB4eTmRkJKGhoRw9epQlS5YwdepUAgMDadeuHaNHjyYlJYUffvih/PMOHTowZswYwEr+mzZt4uKLLyYhIYG//OUvZGVl1WgZq/o3KyWDbpFhDOrc2uPz6hIRxoW9Inl3hTbvV+fGr87Uqjuj8hRffZRLWbmAgIAzxgkICKC4uLjKeVT0nYwx9O3bt/wJAco527JPsDbjKH+4rLdH1r+K3Da8C7e+uYr5G/dz9cCODTJP5Xv0TK0B+OujXEaNGsXs2bMpKSkhNzeXH374gcGDBzNq1ChmzZpFSUkJ+/fvL7+e16tXL3Jzc8uXVVFREZs3b67XmFTNzE7JJDhQmJjYcMllZPcIq3m/NhhR50CTWgPw10e5TJw4kfj4eAYMGMCYMWN4+umnOe+885g4cSI9evSgf//+3HPPPYwePRqAkJAQPv74Yx5++GEGDBhAQkICy5Ytq9eYVPUKikuYszaLi/u0I6J5wz3ep6x5/wZt3q/OgT56xsN8+VEujY0+eqZmvvxpH9PfX8dbvxjM6J417wy7No+eqUxeQTFD/7aAi3pH8a8pA+s0DVV7+ugZpZTPmp2SScdWTRnRPaLB5928SRDXJdnN+49r835Ve5rUPEwf5aIak8zDp/hx+0GuS4omMKBhGoi4u2WY1Xv/e9p7v6oDv0hqvlTFqupG14Ga+Wh1JiJwXZLn702rTFnzfu29X9WFx5OaiIwXka0iskNEzurTSURuFJGf7L9lIjKgpuPWRGhoKIcOHdKdmh8zxnDo0CFCQ0OdDsWrlZQaPlydxeiekXRs1dTRWG4b3oWDeQXM36g9yqja8eh9aiISCLwEXAxkASkiMtcYs8Wl2G5gtDHmiIhcCswAhtRw3GpFR0eTlZVFbm5ufXwl1UiFhoYSHR3tdBhe7YdtuRw4ns/jE/o4HYrVvD/Sat6v96yp2vD0zdeDgR3GmF0AIjILuAooT0zGGNc22yuA6JqOWxPBwcF06dKlzl9AKX8xKyWDiOYhjIlr53QoBAQIt50fy2Ofb2ZdxhEGdvJ8rybKN3i6+rEjkOnyPsv+rDK/BP5bx3GVUnWUcyKfBak5XJMYTUiQd1xqn5Ro9d4/U2/GVrXg6bW3ouZTFV7cEpELsZLaw7UZV0SmichqEVmtVYxK1c2ctXspLjVc3wCdF9dUWfP++dq8X9WCp5NaFuC6lUQD+9wLiUg88DpwlTHmUG3GNcbMMMYkGWOSIiNrfqOoUspijGF2SiaDY9vQLbK50+Gc4dZhsRSXavN+VXOeTmopQA8R6SIiIcAUYK5rARHpBMwBbjbGbKvNuEqpc7dq92F2Hzzpsadbn4vYiDAu7BXFeyszKCiu/YN0lf/xaFIzxhQD04GvgVTgQ2PMZhG5W0Tutos9BrQFXhaR9SKyuqpxPRmvUv5odkom4U2CuKx/e6dDqdCt58dq835VYx5/9IwxZj4w3+2zV11e3wHcUdNxlVL159jpIuZt3M91SdE0DQl0OpwKlTXvn7k0nYkD9bYMVTXvaOaklHLE3PV7KSguZUpyJ6dDqVRZ8/4NWce0935VLU1qSvmxWSmZ9O3Qgn4dq35SutMmJUYTrs37VQ1oUlPKT23ae4zN+44zxQsbiLizmvfHMO+n/WRr835VBU1qSvmpWSkZNAkKYEJC4+jT4JZhnSkxhreXpzsdivJimtSU8kOnC0v4fN0+Lu/fnpZNg50Op0ZiI8K4rH97/rM0ndwTBU6Ho7yUJjWl/ND8jfs5UVDslfemVeWBcb0oLC7lxYXbnQ5FeSlNakr5odkpmXSJCGNwlzZOh1IrXSLCmDI4hvdXZpB+8KTT4SgvpElNKT+zMzePVemHmZwcg4gzT7c+F/eP7UFwYADPfLPV6VCUF9KkppSf+TAlk6AAYVJi42gg4i4qPJQ7R3bhy5/281PWUafDUV5Gk5pSfsQYw5c/7Wd0z0iiwhvvk8DvHNWVNmEhPPnfNH2qvTqDJjWl/MjW7BPsPXqai/s4/yDQcxEeGsx9Y7qzbOchftx+0OlwlBfRpKaUH1mQmgPAmLgohyM5dzcM6URMm6Y8+d80Skv1bE1ZNKkp5UcWpGYTH92SqBaNt+qxTJOgQB4Y14st+4/zxU9nPWpR+SlNakr5iYN5BazLPMrYuMZd9ejqyvgO9Gnfgn98vVWft6YATWpK+Y3FW3MxBsb2bvxVj2UCAoRHLo0j68hp3tenYys0qSnlNxakZnNei1D6dmjhdCj1amSPCIZ3b8uLC3dwIr/I6XCUwzSpKeUHCopL+GFbLmN6RzXKG66rIiI8PD6OwycL+b8fdjkdjnKYJjWl/MCq3Yc5WVjCWB9o9ViR+OhWXBHfnv/7cTc5J/TRNP5Mk5pSfmBBag6hwQEM7x7hdCge88C4XhSVlPLCAu3s2J9pUlPKxxljWJCWzfBuEYQGBzodjsfERoQxdXAnPliVya7cPKfDUQ7RpKaUj9uek0fm4dOM7e07Tfkrc9/Y7jQJCuCf32xzOhTlEE1qSvk4X+pFpDpR4aHcMbIr8zbuZ0OmdnbsjzSpKeXjFqRm069jC85r2fh7EamJO0d2oa12duy3NKkp5cMOnyxkbcYRxvhQLyLVKevsePmuQ/ygnR37HU1qSvmwxVtzKDVwkQ/1IlITNwzprJ0d+ylNakr5sAWpOUSFN6Ffh5ZOh9KgQoICeGBcL1L3H2fuBu3s2J9oUlPKRxUWl1q9iMRFERDgW72I1MSV8R3o26EFz3yjnR37E01qSvmolPTDnCgo9oum/BVx7ez4vRXa2bG/0KSmlI9akJpDSFAAw7u3dToUx4zsEcmI7hG8uHA7x7WzY7/g8aQmIuNFZKuI7BCRRyoYHiciy0WkQEQecBuWLiIbRWS9iKz2dKxK+YqfexFpS7OQIKfDcdTD4+M4cqpIOzv2Ex5NaiISCLwEXAr0AaaKSB+3YoeB+4FnKpnMhcaYBGNMkuciVcq37Mw9yZ5Dpxjjp1WPrvpHt+TKAR14/cfd5BzXzo59nafP1AYDO4wxu4wxhcAs4CrXAsaYHGNMCqB1A0rVkwWp2QA+2yt/bT0wridFJaU8r50d+zxPJ7WOQKbL+yz7s5oywDciskZEptVrZEr5sAVpOfRu34IOrZo6HYpX6Nw2jBuHdGJWinZ27Os8ndQqakdcmzshhxtjErGqL38lIqPOmoHINBFZLSKrc3Nz6xqnUj7j6KlC1uw54nc3XFdn+pgeNAkK4JlvtjodivIgTye1LCDG5X00UOM7IY0x++z/OcCnWNWZ7mVmGGOSjDFJkZGR5xiuUo3f99tyKSk1ftGBcW1EhjfhzpFdmb/xAOsyjjgdjvIQTye1FKCHiHQRkRBgCjC3JiOKSJiIhJe9BsYBmzwWqVI+4rvUHCKaN2FAdCunQ/E6d47qqp0d+ziPJjVjTDEwHfgaSAU+NMZsFpG7ReRuABE5T0SygN8BfxSRLBFpAbQDlojIBmAVMM8Y85Un41WqsSsqKWXx1hzGxEX6ZS8i1WneJIj7x/Zg5e7DLN6mlyt8kcdvYDHGzAfmu332qsvrA1jVku6OAwM8G51SvmV1+hFO5Bf7Va/8tTV1cCfeWLKbp/6bxqgekQRq8vcp2qOIUj5kQWo2IYEBjOwR4XQoXiskKIAHLulF2oETfL5+r9PhqHqmSU0pH7IwLYeh3doS1sS/exGpzhX929OvYwv++c028ou0s2NfoklNKR+xKzePXQdPalP+GggIEB4Z35u9R0/z7oo9Toej6pEmNaV8xMK0HABtyl9DI3pEMLJHBP9etEM7O/YhmtSU8hHfpWYTd1440a2bOR1Ko/Hw+DiOnipixvfa2bGv0KSmlA84drqIlPQjepZWS/06tmTCgA68vmSXdnbsIzSpKeUDynoR8dcHgp6LB8b1oqTU8C/t7NgnaFJTygcsSM2mTVgICTHai0htdWrbjBuHdGZ2SiY7tbPjRk+TmlKNXHFJKYu35nJhryi9kbiOpo/pTmhQAP/4Sjs7buw0qSnVyK3Zc4Rjp4u0Kf85iGjehF+O6MJXmw+w59BJp8NR50CTmlKN3MK0HIIDhRHai8g5uWFIZwIEPlydWX1h5bU0qSnVyH2Xms3Qrm0JDw12OpRG7byWoVzYK4qPVmdRXFLqdDiqjjSpKdWIpR88yc7ck9qUv55MGdyJnBMFLNqqPfg3VprUlGrEFti9iIzVXvnrxYW9IokKb8LslAynQ1F1pElNqUZsYVo2PaKa06mt9iJSH4ICA7h2UDQL03I4cExvxm6MNKkp1Ugdzy9i5a7DesN1Pbs+KYZSA5+szXI6FFUHmtSUaqR+2JZLcalhrDblr1exEWEM69qW2SmZlJYap8NRtaRJTalGamFqDq2bBZPYqbXToficKYNjyDh8ihW7DjkdiqolTWpKNUIlpYZFW3O0FxEPuaTvebRsGsysFL1nrbHRpKZUI7Qu4whHThUxRqsePSI0OJCJAzvy1aYDHDlZ6HQ4qhY0qSnVCH2XmkNQgDCqZ6TTofisyckxFJaU8tn6vU6HompBk5pSjdDCtGwGd2lDC+1FxGN6t2/BgOiWzFqViTHaYKSx0KSmVCOTefgU27LztCl/A5ic3Imt2SfYkHXM6VBUDWlSU6qRWZCaDcBY7RrL464c0J6mwYHaw0gjoklNqUZmQVoO3SLDiI0IczoUnxceGswV8e2Zu34fJwuKnQ5H1YAmNaUakbyCYlbsOqRVjw1oyuAYThaW8OVP+5wORdWAJjWlGpEft+VSVGK06rEBJXZqTY+o5nrPWiOhSU2pRuS71BxaNg1mUGftRaShiAiTk2NYl3GUrQdOOB2OqoYmNaUaiZJSw+KtOVzQK5KgQN10G9KkxGiCA4XZerbm9XTLUKqRWJ95lEMnC/V6mgPahIUwru95zFmXRUFxidPhqCp4PKmJyHgR2SoiO0TkkQqGx4nIchEpEJEHajOuUv5kYVo2gQHC6B7ai4gTpiTHcPRUEd9sznY6FFUFjyY1EQkEXgIuBfoAU0Wkj1uxw8D9wDN1GFcpv7EgNYfk2Na0bKa9iDhheLcIOrZqqlWQXs7TZ2qDgR3GmF3GmEJgFnCVawFjTI4xJgUoqu24SvmLrCOnSDtwgrFxWvXolIAAq8HIkh0HyTx8yulwVCU8ndQ6Aq6HNVn2Z/U2rohME5HVIrI6Nze3zoEq5c0WpuUA6ANBHXbtoGgCBD5crWdr3srTSa2iBz3VtGfQGo1rjJlhjEkyxiRFRuq1BuWbFqTm0CUijK6RzZ0Oxa91aNWU0T0j+Wh1FsUlpU6Hoyrg6aSWBcS4vI8Ganpb/rmMq5TPOFlQzPKdh/SGay8xObkTB47n88N2rRnyRp5OailADxHpIiIhwBRgbgOMq5TP+HH7QQpLSvWBoF5ibO8oIpqHMGuVVkF6oyBPTtwYUywi04GvgUDgTWPMZhG52x7+qoicB6wGWgClIvIboI8x5nhF43oyXqW80cK0bMJDg0iObeN0KAoIDgzgmkHRvP7jbnKO5xPVItTpkJQLjyY1AGPMfGC+22evurw+gFW1WKNxlfInpaWGhWm5jO4ZSbD2IuI1JifF8Nr3u/h4bRb3XtDd6XCUC91KlPJiP+09xsG8Ai7SXkS8StfI5gzu0obZKfpUbG+jSU0pL7YgNZsAgQt6actebzMlOYY9h06xYtdhp0NRLjSpKeXFFqTmkNS5Da2ahTgdinJzWf/2hIcG6VOxvYwmNaW81L6jp9my/7jecO2lQoMDmTiwI/M3HeDYKfcOkZRTNKkp5aW0FxHvNzk5hsLiUj5bv9fpUJRNk5pSXmpBajad2zajm/Yi4rX6dmhJ/44t+WBVhjYY8RKa1JTyQqcKi1m68xBj4qIQqajHOOUtJifHkHbgBBv3HnM6FIUmNaW80tIdhygsLtWm/I3AhIQOhAYHMEsfSeMVNKkp5YUWpGYT3kR7EWkMWoQGc3n/Dsxdv49ThcVOh+P3NKkp5WWsXkRyGNUzkpAg3UQbgymDY8grKGbeT/udDsXv6RajlJd5fsF2ck4UMK6vVj02FkmdW9M1Mkyfiu0FNKkp5UX+9d02nl+wnesGRXNlfAenw1E1JCJMSY5h9Z4j7Mg54XQ4fk2TmlJe4vnvtvOv77Zz7aBonromnoAAbfXYmExKjCYoQPSRNA7TpKaUF3hxwXae+24b1yRqQmusIpo34eI+7Zizbi8FxSVOh+O3NKkp5bB/L9zOP7/dxqSBHXn62ngCNaE1WpOTYzh8spDvtuQ4HYrf0qSmlINeWrSDZ77ZxsSBHfnHdQM0oTVyI3tE0rFVU2ZpJ8eO0aSmlENeXryDf3y9lasTOvCMJjSfEBggXJcUzZIdB8k8fMrpcPySJjWlHPDK4p08/dVWrkrowD+vT9CE5kOuS4oB4KM1WQ5H4p80qSnVwF77fidPfZXGhAEd+Keeofmcjq2aMqpHJB+tzqSkVDs5bmia1JRqQDN+2Mnf/5vGlQM68Oz1AwgK1E3QF01JjmH/sXx+2J7rdCh+R7copRrI/mOn+dv8NC6Pb89zmtB82tje7WgbFsJsvWetwelWpVQD2H/sNHsOn+Ly/u15fnKCJjQfFxIUwDWDovkuNZvcEwVOh+NXdMtSysPeWLKbPYdP0SYshH9N0YTmL65PiqG41DBnrTYYaUi6dSnlQW8u2c2fv9xCm2YhdI9qTrAmNL/RPao5ybGtmZ2SqU/FbkC6hSnlITOX7uaJL7cwvu95dG/XnAC0laO/mZzciV0HT5KSfsTpUPyGJjWlPOCtZek8/sUWLunbjhdvGKgJzU9d1v88wpsEMWuV9jDSUDSpKVXP3l6ezv/M3czFfdrx4tRErXL0Y81CgpiQ0IF5G/dz7HSR0+H4Bd3alKpH7yxP57HPrYT20g2J+uRqxZTkThQUlzJ3/V6nQ/ELusUpVU/eXbGHP32+mYt6R2lCU+X6dWxBn/YtmKVPxW4QHt/qRGS8iGwVkR0i8kgFw0VEXrCH/yQiiS7D0kVko4isF5HVno5Vqbp6b+Ue/vjZJsbGRfHSjZrQ1M9EhCmDY9i87zgbMo86HY7P8+iWJyKBwEvApUAfYKqI9HErdinQw/6bBrziNvxCY0yCMSbJk7EqVVfvr8zgD59uYkxcFC/flEiToECnQ1Je5qqEjrRuFsyDH2/gZEGx0+H4NE8fTg4GdhhjdhljCoFZwFVuZa4C3jaWFUArEWnv4biUqhcfrMrg0U83cmGvSF7RhKYq0bJpMC9OTWRHTh4PffKT3rfmQZ5Oah0B14rkLPuzmpYxwDciskZEplU0AxGZJiKrRWR1bq52HqoazuyUDH4/ZyMX9IrklZsGaUJTVRrRI4KHx8cx76f9zPhhl9Ph+CxPJ7WKbs5xP0SpqsxwY0wiVhXlr0Rk1FkFjZlhjEkyxiRFRkaeW7RK1YAxhplLd/PInI2M7hnJqzcNIjRYE5qq3rRRXbk8vj1PfZXGku0HnQ7HJ3k6qWUBMS7vo4F9NS1jjCn7nwN8ilWdqZRjThUW87sPN/D4F1sYG9eO127WhKZqTkR4+pp4ekSFc98Ha/Xp2B7g6aSWAvQQkS4iEgJMAea6lZkL3GK3ghwKHDPG7BeRMBEJBxCRMGAcsMnD8SpVqd0HTzLp5WV8tn4v/+/inszQhKbqIKxJEK/dPIjiUsPd764hv6jE6ZB8ikeTmjGmGJgOfA2kAh8aYzaLyN0icrddbD6wC9gB/B9wr/15O2CJiGwAVgHzjDFfeTJepSrzzeYDTHhxCdnH83nr9sHcN7YHAfrEalVHsRFhPD8lgS37j/Popxu14Ug9CvL0DIwx87ESl+tnr7q8NsCvKhhvFzDA0/EpVZXiklL++e02Xlm8k/jolrx8YyLRrZs5HZbyAWPi2vGbsT157rttDIhuxa3nxzodkk/weFJTqrE6mFfA/R+sY9nOQ0wd3In/ubKPVjeqenXfmO5s3HuMP3+5hd7tWzC4SxunQ2r0tNsDpSqwLuMIV764hDV7jvD0tfH8fVJ/TWiq3gUECM9OHkCnNs249701HDiW73RIjZ4mNaVcGGN4Z3k617+2nKBA4ZN7zuf6pJhqx1OqrlqEBvPazYM4XVjC3e+uoaBYG46cC01qStlOF5bwuw838KfPNzOiewRfTh9Jv44tnQ5L+YEe7cL55/UDWJ95lMfnbnE6nEZNr6kpBaQfPMnd765ha/YJfndxT6Zf2F1bN6oGNb5fe+69oBsvL97JgOiWTBncyemQGiVNasrvfbslm999uJ7AAGHm7YMZ3VN7plHO+H/jerFp33Ee+3wzvc4LZ2Cn1k6H1Oho9aPyWyWlhqe/SuPOt1cT2zaML6aP0ISmHBUYILwwJYF2LZtwz7tryT1R4HRIjY4mNeWXDuUVcMubK3l58U6mDo7ho7uHEdNG7z9TzmvVLITXbkri6OlCfvX+WopKSp0OqVHRpKb8zrqMI1zx4hJS0o/w9DXx/H1SvDbXV16lT4cWPHVNPKt2H+Zv81OdDqdR0Wtqym8YY3h3ZQZPfLGZdi1CmXPP+dq6UXmtqxI6siHzGG8u3U18dEsmDox2OqRGQZOa8gunC0v4w6cbmbNuLxf0iuRfkxNo1SzE6bCUqtLvL4tj875j/H7ORnq2C6dvBz0Iq45WPyqfl37wJBNfXsqn6/fym4t68OatyZrQVKMQHBjASzcm0rpZCHe9s4YjJwudDsnraVJTPu3bLdlc+e8l7D+Wz39uS+Y3F/XU+89UoxLRvAmv3jSInBMF3D9rHSWl2qN/VTSpKZ9jjGHLvuM88cUW7nx7NZ3bNuPL+0ZwQa8op0NTqk4GxLTiL1f148ftB/nH11udDser6TU15RNOF5awbOdBFqTlsCgth/12x7BTkmN4fEJfbd2oGr3rk2PYkHWUV7+3HoN0Wf/2TofklTSpqUZr79HTLEzLYWFqNst2HqKguJSwkEBG9ojktxdFcUFcJFHhoU6HqVS9+Z8r+5K6/zgPfLSB7lHN6dku3OmQvI4mNdVolJQa1mUcsRJZWg5pB04A0KlNM24Y0okxcVEM7tKGJkF6VqZ8U0hQAK/cNIgrXlzCXe+s4bNfDadl02Cnw/IqmtSUVzt2qojvt+eyKC2HxVtzOHKqiMAAITm2NX+4rDcXxkXRLTIMEW38ofxDuxahvHxjIlNnrOB3s9fzf7ckaeMnF5rUlFcxxrAzN48FqdbZ2Oo9RygpNbRuFsyFvaIY0zuKkT0i9ehU+bXk2DY8dmUfHvt8My8s3M5vLurpdEheQ5OaclxBcQkrdx0ur1bMOHwKgLjzwrl7dFfGxLUjIaYVgXo0qlS5m4d2ZkPmMf713Xb6d2zJ2N7tnA7JK2hSU47IPp7PIjuJLdlxkFOFJTQJCmB49wimjerKhXFRdGzV1OkwlfJaIsJfJ/Zja/ZxfjN7PXOnj6BLRJjTYTlOk5pqEKWlhg1ZR61EtjWHTXuPA9C+ZSgTB3ZkbO8ohnWNoGmINvJQqqZCgwN59aZBTPj3Uu5+Zw3z7h9BUKB/336sSU15zPH8In7cdpCFaTl8vy2Hg3mFBAgM7NSaBy/pxZi4KOLOC9dGHkqdg+jWzfj3DQPJyy/2+4QGmtRUPbIaeZxkYVq21cgj/QjFpYaWTYMZ3TOSMXFRjO4ZSesw7XdRqfp0frcIp0PwGprU1DnJLyph5e7D5dfHyhp59GoXzh0juzK2dxQDY1rpEaRSqkFoUlO1duBYPou22o08th/kdJHVyOP8bm25c1RXLuwVSXRrfYq0UqrhaVJT1SqxG3kstO8d27LfauTRoWUo1wzqyJg4beShlPIOmtR8UHFJKfnFpeQXlXC6sISC4hJOF5aSX2y9zy8qsYYXlrh8VsrpImtYgctnp4pK2LT3GIdPWo08BnVuzUPjrUYevdppIw+llHfRpOYlSksNJwuLOZFf9lfEiQKX12f8P/PzvIJiKyHZSaqopG7PWwoJDKBJcABNgwMJDQ4kNDiA0OBARvaIYExcFKN6aCMPpZR383hSE5HxwPNAIPC6MeZJt+FiD78MOAXcZoxZW5NxnVRSajhVWMzpwhJO2X+ni4p/fl3+uVXmZGHJWckpz05ax/OLyCsoxlSTiwIDhPDQIJo3CSI8NJjw0CA6tAqleZMgmoYEEeqSkJraSalJ+eufPwt1SVo/J7BA7bFDKdXoeTSpiUgg8BJwMZAFpIjIXGPMFpdilwI97L8hwCvAkBqOWy/yi0p47ftdnCr6OUmdthOSlazO/qyguLRW8wgOlPJEFB4aRHiTYDq1aXbmZ6FBLu+t/y1cXjcNDtTqPqWUqoKnz9QGAzuMMbsARGQWcBXgmpiuAt42xhhghYi0EpH2QGwNxq0XxsBz322jSVAAzUICaRYSRNMQ68ymaUggbcJCiG4dSNPgIHu49bn1P4hmwa6fBZ0xvFmwNa2QIG3SrpRSnubppNYRyHR5n4V1NlZdmY41HBcRmQZMA+jUqVOdggwNDmDn3y7T6jflOef1dzoCj4trE0dRt2M0OS/O6VCUH/N0UqsoS7hfOaqsTE3GxRgzA5gBkJSUVKcWEiJCoOYz5UmXes3lYI95ePDDVt2MUg7ydFLLAmJc3kcD+2pYJqQG4yqllFLlPH2hJwXoISJdRCQEmALMdSszF7hFLEOBY8aY/TUcVymllCrn0TM1Y0yxiEwHvsZqlv+mMWaziNxtD38VmI/VnH8HVpP+26sa15PxKqWUatzEVHdzVCOSlJRkVq9e7XQYSinVqIjIGmNMktNx1AdtZ66UUspnaFJTSinlMzSpKaWU8hma1JRSSvkMn2ooIiK5wB4PziICOOjB6dcHjbF+aIz1Q2OsH56OsbMxJtKD028wPpXUPE1EVnt7CyGNsX5ojPVDY6wfiFLwRQAABBNJREFUjSFGb6HVj0oppXyGJjWllFI+Q5Na7cxwOoAa0Bjrh8ZYPzTG+tEYYvQKek1NKaWUz9AzNaWUUj5Dk5pSSimfoUmtBkQkRkQWiUiqiGwWkV87HZM7EQkVkVUissGO8X+djqkyIhIoIutE5EunY6mIiKSLyEYRWS8iXtlDtoi0EpGPRSTNXi+HOR2TKxHpZS+/sr/jIvIbp+NyJyK/tbeXTSLygYiEOh2TOxH5tR3fZm9cht5Gr6nVgIi0B9obY9aKSDiwBrjaGLPF4dDKiYgAYcaYPBEJBpYAvzbGrHA4tLOIyO+AJKCFMeYKp+NxJyLpQJIxxmtvyBWRt4AfjTGv288bbGaMOep0XBURkUBgLzDEGOPJzhFqRUQ6Ym0nfYwxp0XkQ2C+MWams5H9TET6AbOwnileCHwF3GOM2e5oYF5Mz9RqwBiz3xiz1n59AkgFOjob1ZmMJc9+G2z/ed0Ri4hEA5cDrzsdS2MlIi2AUcAbAMaYQm9NaLaxwE5vSmgugoCmIhIENAP2ORyPu97ACmPMKWNMMfA9MNHhmLyaJrVaEpFYYCCw0tlIzmZX660HcoBvjTFeFyPwL+AhoNTpQKpggG9EZI2ITHM6mAp0BXKB/9jVuK+LSJjTQVVhCvCB00G4M8bsBZ4BMoD9wDFjzDfORnWWTcAoEWkrIs2wHqgc43BMXk2TWi2ISHPgE+A3xpjjTsfjzhhTYoxJAKKBwXbVhdcQkSuAHGPMGqdjqcZwY0wicCnwKxEZ5XRAboKAROAVY8xA4CTwiLMhVcyuGp0AfOR0LO5EpDVwFdAF6ACEichNzkZ1JmNMKvAU8C1W1eMGoNjRoLycJrUasq9TfQK8Z4yZ43Q8VbGrohYD4x0Oxd1wYIJ9zWoWMEZE3nU2pLMZY/bZ/3OAT7GuZ3iTLCDL5Uz8Y6wk540uBdYaY7KdDqQCFwG7jTG5xpgiYA5wvsMxncUY84YxJtEYMwo4DOj1tCpoUqsBuxHGG0CqMeZZp+OpiIhEikgr+3VTrA02zdmozmSM+b0xJtoYE4tVJbXQGONVR8YiEmY3BsKu0huHVQXkNYwxB4BMEellfzQW8JpGS26m4oVVj7YMYKiINLO38bFY18u9iohE2f87AZPw3uXpFYKcDqCRGA7cDGy0r1kBPGqMme9gTO7a8//buUMbBIIgCsP/EDpAUBGFoBCEohAgyBEsLWCgA1qgAMQg7hzndzP5vwqe2pfMTgaO06bZArhkZpcr851bA7fxjWMJnDPz3jbSrANwmsZ7b2DbOM+f6Q9oA+xaZ5mTmY+IGIAn40jvRZ/nqK4RsQK+wD4zP60D9cyVfklSGY4fJUllWGqSpDIsNUlSGZaaJKkMS02SVIalJkkqw1KTJJXxA23ohqPs8T8KAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3dd3xUZfb48c9JQiohCaFDQmgSAiEJhM6CZUFcFQELawd1UbFtsfB1Xdd1m7r+1rKrsqwounYR1t6BVQgIhN5JqKEnIQnp7fn9cSdxCCEZIMmdct6v17wyc+uZycyZO+c+93nEGINSSinv5Wd3AEoppZqXJnqllPJymuiVUsrLaaJXSikvp4leKaW8nCZ6pZTycprovYiIzBaR39kdhx1E5HoR+cqF5T4XkZubcL+3i8izTbW95iAicSJiRKRQRGbYHc+ZcsTe+wzXOc/xfKtE5DbHtHtF5InmidLNGWP01sw3YA9QAhQ63f55FtuZBix1etwGWAZ8ALRqhrhHA2lAPpDr2NeQZtjP+UBWI8vMA8odr90JIB0Ya/P/NRDYD3R1PD4P+BA45ni9vgT61lnnV8Bhx2v6ChDkNO9uYDVQBsyrZ3/XAFsdz38LMMnFOOMAAwTUec2XOD0W4F5gE1AEZAHvA4l2vsaO2AzQ24XlHgMeqzNtCXCb436w43l1sPs5tfRNj+hbzuXGmNZOt7vPZWMiEgV8A+wFphpjKhpZPuAMt98G+AT4B9AW6Ar8ASsJ2eUpY0xrIAJ4CVggIv42xnMFsM0Yc8DxOBL4COgLdARWYiV+AETkYmAWcBFW8u2J9ZrWOAj8CesL4CQi0hV4A/g11hf8A8BbItKhiZ7Lc8B9WMm+LdaX1n+BS5to+7YzxpQCnwM32R1LS9NEbyMRCRKRPBEZ4DStvYiUNPQBFpF2wCJgM3CDMabSMX2eiPzJcf98EckSkYdE5DDwqohEicgnInJMRI477nc7zW7OAzDGvG2MqTLGlBhjvjLGbHCK4xYR2erY1pci0t1pnhGRmSKyU0ROiMgfRaSXiCwXkQIReU9EAkUkDOvD18XxU7tQRLo09LoZY6qBt7ASUkfH/qaJyFLH/QedtlUoIhUiMs8xb4nTT/lpIrJMRJ5x/B92ichIx/T9InK0kTLPJcD/nOJaaYyZa4zJdXzxPgP0FZFoxyI3A3ONMZuNMceBP2L9SqtZf4Ex5r9ATj376gbkGWM+N5ZPsY68ezX0WrlCRPoAdwHXGmMWGWPKjDHFxpg3jTFPOJaJEJHXHe+dvSLyiIj4Oead0evoeN8/LSL7ROSIo+QY4jT/ARE5JCIHReQWp+lDHMsHOE27UkTWncHTXYIXfXm5ShO9jYwxZcAC4FqnydcA/zPGHD3Nam2xkssPwC2OpHc6nRzLdwdmYP2/X3U8jsUqJ/3zNOvuAKpE5DURucTxC6KWiEwCHgamAO2B74G362xjAjAYGA48CMwBrgdigAFYiaUIK2EedPq1c7CB54TjKP4mYDdwpO58Y8xTNdsC+mGVUt47zeaGARuAaKwvj3eAIUBv4AbgnyLS+jTrJgLbGwh1DHDYGFOTuPsD653mrwc6On0RNGQ1sFVEJoqIv+P1L3PEfsaMMUuMMec7Hl6EVTpb2cAq/8D6JdUTGIv1+k93mn8mr+OTWAcSyY75XYFHAURkAnA/MA7oA/zUKeZVWF+C45z2ewPwH8f8x4wxjzXy1LcCSY0s433srh35wg2rRl8I5DndfuGY91Ngl9Oyy4CbTrOdaVj12QpgWD3z5wF/ctw/H6umHdxAXMnA8Qbm93NsMwuoxCpLdHTM+xy41WlZP6AY6O54bIBRTvPTgYecHv8/4FmnWF2p0Zc6XrtSx+36Oq/N0jrrhNSz3yX8WLOdBux0mpfoiLuj07QcIPk0Me0EJpxmXjfgANaXWc20TOflgVaO/cXVWfdP1F+jv9XxPqp0vNaXuvj+i6NOjb7O/N8CKxpY3x/rSyXBadrtOGr8Z/I6Yp0LKAJ6Oc0bAex23H8FeMJp3nk41eiBh4A3HffbOl6Hzg3EXvv/djzuA1S5+tn1lpse0becScaYSKfbvx3TFwEhIjLMUfpIBhY2sJ31WEc8n4tISiP7PGasuiQAIhIqIv9y/PQuAL4DIk9X5zbGbDXGTDPGdMM6Au8C1LQw6Q485/ipnod18lGwjs5qOB9tl9Tz+HRHyqfztDEmEiuBpwJ/E5FLGlh+LrDdGPNkA8vUjQljjKtxHgfC604UkfbAV8CLxhjnXzmFWPX1GjX3TzQQX802fwo8hfWlGIh1VP2yiCQ3tq4LcoDODcxv59jnXqdpe2n4f32617E9EAqkO713vnBMB+s9tr/Ofpy9AVzu+HVwDfC9MeZQA7HXFY51ItynaKK3mbFKL+9hlW+uAz4xxjT4wTfGPAc8AXztXN+vb9E6j3+DdaJwmDGmDVZpAawE3Vic27COqmv2tx+4vc6XV4gxJq2xbbkQZ2OxGGPMJqxfP/XWW0VkFtZzvfUs4nHVBhznMpz2G4WV5D8yxvy5zvKbOblskAQcMT+WdhqSDHxnjFltjKk2VhnjB5xKG+fgW6CbiKSeZn421q/I7k7TYrF+sZypbKyk39/pfRNhrDIbwCGs0p7zfmoZ68T3cmAycCOOss0Z6MfJ5TOfoInePbwFTMWqX7/lygrGmKewWkp8IyJ9XdxPONaHLE9E2gK/P92CIhIvIr+pOVkrIjFYX0YrHIvMBv5PRPo75keIyNUuxlHXESBaRCJcXUFE4rGaf26uZ94lWK1HJhljSs4yJld8hnVkXbPfNlhNKpcZY2bVs/zrwK0ikuD4QngE68uzZv0AEQnGKpX4i0iw04nHVcBPao7gHb/mfoKjRu848bnnbJ6EMWYn8CLwtlgn8QMd+/65iMwyxlRhHYz8WUTCHb88f411dH2m+6oG/g08I44GByLSVawWSTj2M83xGoVS/3v0daxzPok0/Ou3PmOxyo4+RRN9y/m4TkuQ2jeoMeYHrLplF87gTWiM+SPwMvCtiLjS+uJZrLJHNlbC/qKBZU9gnWD7QUSKHMtvwvpVgDFmIdZJtXccZaBNWCdVz5jj18LbwC7Hz/nTtbqpaU1ThHXU/Crwr3qWm4pVCtjq9HrPPpvYGvExEO8U72SsE5DT6/yvYwGMMV9glV8WY5Uk9nJyInsE64t4FtZJxhLHNIwx/8NqJz5fRE5gXTvxF2NMzUViMVi/cM7WvVgn5l/AOg+S6Xg+Hzvm34P1Ht0FLMU6IDmlGaiLHgIygBWO9843WL++MMZ8jvU+XeRYZlE96y/E+nWx0Fgn813i+BL9GfDaWcbtscRxgkIpdRbEutI0wRjzS5vj+Aq4zxiztZ553bFaB5UCDzidH/JYIpKJVTr85jTz+2D9CgoEZhpj5onIPUCMMebBFgzVLWiiV0p5FBG5EkcTTdNw82LlcEZXSyqllJ1EZAmQANyoSd51ekSvlFJeTk/GKqWUl3PL0k27du1MXFyc3WEopZTHSE9PzzbGtK9vnlsm+ri4OFavXm13GEop5TFEpO5VxLW0dKOUUl5OE71SSnk5TfRKKeXl3LJGX5+KigqysrIoLS1tfGHllYKDg+nWrRutWrWyOxSlPIrHJPqsrCzCw8OJi4tDpNHOFpWXMcaQk5NDVlYWPXr0sDscpTyKx5RuSktLiY6O1iTvo0SE6Oho/UWn1FnwmEQPaJL3cfr/V+rseFSiV6pZfT7Lunm4J1c+yZMrGxpUq2H5JRV8f9/DHPpT3XFTlKfSRO+ivLw8XnzxRbvDYM+ePbz11o9jk6xevZp7773Xxoi8yOGN1s3DbcvdxrbcbWe1bsbRQia9sIyDq9dzaI3nvxbKooneRQ0l+qqqqibdV2Vl5Wnn1U30qampPP/88026f+WbFm07wuQXllFQUoGIUFh2+veh8iwuJXoRmSAi20UkwzEWZ93514vIBsctTUSSnObtEZGNIrJORDy2X4NZs2aRmZlJcnIyDzzwAEuWLOGCCy7guuuuIzExkT179jBgwI/Dtz799NM89thjAGRmZjJhwgQGDx7MT37yE7ZtO/Vo67HHHmPGjBmMHz+em266iT179vCTn/yEQYMGMWjQINLS0mrj+P7770lOTuaZZ55hyZIlXHbZZbXbuOWWWzj//PPp2bPnSV8Af/zjH4mPj2fcuHFce+21PP300834ailPYozhxSUZ3PraamKjQ/nontG0DgqgsFQTvbdotHmliPhjDS82DsgCVonIR8aYLU6L7QbGGmOOO8brnIM1DF2NC4wx2U0W9eezmv4ndqdEuOSJ085+4okn2LRpE+vWrQNgyZIlrFy5kk2bNtGjRw/27Nlz2nVnzJjB7Nmz6dOnDz/88AMzZ85k0aJTR0hLT09n6dKlhISEUFxczNdff01wcDA7d+7k2muvZfXq1TzxxBM8/fTTfPLJJ7VxONu2bRuLFy/mxIkT9O3blzvvvJP169fzwQcfsHbtWiorKxk0aBCDBw8+89dIeZ2S8ioe/GADH68/yGUDO/O3q5IICfQnMyiAIwWllFdWExigP/w9nSvt6IcCGcaYXQAi8g5wBVCb6I0xaU7LrwC6NWWQ7mro0KGNtukuLCwkLS2Nq6/+cdzssrKyepedOHEiISEhgHWB2N133826devw9/dnx44dLsV06aWXEhQURFBQEB06dODIkSMsXbqUK664onbbl19+uUvbUt7tQF4JM15fzZZDBTw4oS93ju1V27KpdXAAh/IN2w4XMLBbpM2RqnPlSqLvCux3epzFyUfrdd3KyQNcG+ArETHAv4wxc+pbyTH25gyA2NjYhiNq4Mi7JYWFhdXeDwgIoLr6xwFvatp7V1dXExkZWftLwNXtPfPMM3Ts2JH169dTXV1NcHCwSzEFBQXV3vf396eyshIdXEbVtXJ3Lne+kU55ZTVzb07lwviOJ81vHWSlhrX78jTRewFXfpPV13i53swhIhdgJfqHnCaPMsYMAi4B7hKRMfWta4yZY4xJNcaktm9fb5fKtgoPD+fEiROnnd+xY0eOHj1KTk4OZWVltaWVNm3a0KNHD95//33AqoeuX7++0f3l5+fTuXNn/Pz8+M9//lN7wrexOOozevRoPv74Y0pLSyksLOTTTz89o/WVd3nrh31c//IKIkJasfCuUackeYDAAD8CA/xYu++4DRGqpuZKos8CYpwedwMO1l1IRAYCLwNXGGNyaqYbYw46/h4FFmKVgjxOdHQ0o0aNYsCAATzwwAOnzG/VqhWPPvoow4YN47LLLiM+Pr523ptvvsncuXNJSkqif//+fPjhh43ub+bMmbz22msMHz6cHTt21B7tDxw4kICAAJKSknjmmWdcin3IkCFMnDiRpKQkpkyZQmpqKhERES4+c+UtyiureeS/G3l44UZG9mrHwrtG0btD63qXFayj+rX781o2SNU8jDEN3rDKO7uAHkAgsB7oX2eZWCADGFlnehgQ7nQ/DZjQ2D4HDx5s6tqyZcsp05TrTpw4YYwxpqioyAwePNikp6fbHNHZadb3wSs/s24ebtrn08y0z6edNC37RKm5enaa6f7QJ+Yvn24xlVXVDW5jzw03mrTLrzLdH/rEZJ8obc5wVRMBVpvT5NRGa/TGmEoRuRv4EvAHXjHGbBaROxzzZwOPAtHAi46TOZXGmFSgI7DQMS0AeMsY88W5fz2pMzVjxgy2bNlCaWkpN998M4MGDbI7JNVCNh/MZ8br6WQXlvHs1GQmpXR1ab3WQVYvoev253FRv1PLO8pzuNR7pTHmM+CzOtNmO92/DbitnvV2AUl1p6uW53yRlfIdn2w4yP3vrycqNJD37xhxRidWw4L88fcT1u7TRO/pPKabYqXUmXn6y+38c3EGg7tH8dINg+gQ7lrLrRr+IvTrHM7a/XpC1tNpolfKy1RVGzKOFbJ4awZTU2N4fFJ/ggL8z2pbKTFRLFx7gKpqg7+f9h7qqfSSN6W8yO7sIjYdzCevuILHr+jPE1cmnnWSB0iJjaSwrJKMo4VNGKVqaZrolfIS3+04xhX/XEpFlaFfp3BuGnHuo7GlxEYBaHt6D6eJXrnEufO0jz76iCeeOP3VyXV7+jx48CBXXXVVs8foq4wxvPz9Lqa9upIukSEkdo2gTUjTjKsbFx1KZGgr1u7T9vSeTBO9zZq6i+OW2P/EiROZNev0A3TUTfRdunRh/vz5ZxWfalhpRRW/eW89f/p0Kxf378QHd44kqAk7IRMRUmIi9YSsh9NEfwYmTZrE4MGD6d+/P3PmWF32vPTSSzz44IO1y8ybN4977rkHgDfeeIOhQ4eSnJzM7bffXptUW7duXXsV7fLly3n88ccZMmQIAwYMYMaMGbV906xatYqBAwcyYsQIHnjggdpukKuqqnjggQcYMmQIAwcO5F//+tcpse7Zs4f4+HhuvvlmBg4cyFVXXUVxcTEAcXFxPP7444wePZr333+fr776ihEjRjBo0CCuvvpqCguteuwXX3xBfHw8o0ePZsGCBSc9x7vvvhuAI0eOMHnyZJKSkkhKSiItLe2ULp2du3AuLS1l+vTpJCYmkpKSwuLFi2u3OWXKFCZMmECfPn1Oek1V/Q7nlzL1X8tZsPYAvx53Hi9cN4iwoKZvX5ESG8XOo4UUlFY0+bZVy/DIVjdPrnzyrEfQOZ34tvE8NPShBpd55ZVXaNu2LSUlJQwZMoQrr7ySq666ihEjRvDUU08B8O677/Lb3/6WrVu38u6777Js2TJatWrFzJkzefPNN7npppsoKipiwIABPP744wAkJCTw6KOPAnDjjTfyySefcPnllzN9+nTmzJnDyJEjTzqCnjt3LhEREaxatYqysjJGjRrF+PHjT+lJc/v27cydO5dRo0Zxyy238OKLL3L//fcDEBwczNKlS8nOzmbKlCl88803hIWF8eSTT/L3v/+dBx98kF/84hcsWrSI3r17M3Xq1Hpfk3vvvZexY8eycOFCqqqqKCwsPKVLZ+cunF944QUANm7cyLZt2xg/fnxtz5zr1q1j7dq1BAUF0bdvX+655x5iYmJO2aeCTQfymT5vFcVllcy5cTDj+3dqtn2lxEZiDGzYn8/oPu2abT+q+egR/Rl4/vnnSUpKYvjw4ezfv5+dO3fSvn17evbsyYoVK8jJyWH79u2MGjWKb7/9lvT0dIYMGUJycjLffvstu3btAqxeJa+88sra7S5evJhhw4aRmJjIokWL2Lx5M3l5eZw4cYKRI0cCcN1119Uu/9VXX/H666+TnJzMsGHDyMnJYefOnafEGxMTw6hRowC44YYbWLp0ae28msS9YsUKtmzZwqhRo0hOTua1115j7969bNu2jR49etCnTx9EhBtuuKHe12TRokXceeedtc+rsT50li5dyo033ghAfHw83bt3r030F110EREREQQHB5OQkMDevXsb3JYv++vnWxFgwcxRzZrkAZJiIhHRE7KezCOP6Bs78m4OS5Ys4ZtvvmH58uWEhoZy/vnn13ZFPHXqVN577z3i4+OZPHkyIoIxhptvvpm//vWvp2wrODgYf3+ryVtpaSkzZ85k9erVxMTE8Nhjj1FaWtpg18LGGP7xj39w8cUXNxhz3RYXzo9rOkkzxjBu3Djefvvtk5Zdt27dObfYqE9Dz6u+LpbVqUorqli15zg3Du9O307hzb6/NsGt6N2+tXZw5sH0iN5F+fn5REVFERoayrZt21ixYkXtvClTpvDf//6Xt99+u/ZI+aKLLmL+/PkcPXoUgNzc3HqPUGu+LNq1a0dhYWHtScuoqCjCw8Nr9/POO+/UrnPxxRfz0ksvUVFh1Ux37NhBUVHRKdvet28fy5cvB+Dtt99m9OjRpywzfPhwli1bRkZGBgDFxcXs2LGD+Ph4du/eTWZmZu369bnooot46aWXAOvcQUFBQYNdKY8ZM4Y333yzNu59+/bRt2/fepdV9Vuz9zjlldWM7BXdYvtMiY1k7b7jOraBh9JE76IJEyZQWVnJwIED+d3vfsfw4cNr50VFRdWWGoYOtXphTkhI4E9/+hPjx49n4MCBjBs3jkOHDp2y3cjISH7xi1+QmJjIpEmTGDJkSO28uXPnMmPGDEaMGIExprYsctttt5GQkMCgQYMYMGAAt99+e71Hv/369eO1115j4MCB5Obm1pZYnLVv35558+Zx7bXXMnDgQIYPH862bdsIDg5mzpw5XHrppYwePZru3bvX+7o899xzLF68mMTERAYPHszmzZsb7NJ55syZVFVVkZiYyNSpU5k3b95JR/KqcWmZOfj7CUN7tG2xfabERnG8uIK9OcUttk/VdMQdv6FTU1PN6tUnjyO+detW+vXrZ1NE9igsLKR1a6u/8CeeeIJDhw7x3HPPubTunj17uOyyy9i0aVNzhtjimvV98Oql1t/p7j0wy5QXl2GAhTNH1Tt/+hfTAXh1wqtnvY+9N94EQPf/vA7AtsMFTHj2e56ZmsTkFJ8YKdTjiEi6o9fgU+gRvRv79NNPSU5OZsCAAXz//fc88sgjdoekbHaitIL1WfmM6tWyrV/6dAgnLNBfL5zyUB55MtZXTJ069bTNGhsTFxfndUfzClbtyaWq2rRofR7A309IionURO+h9IheKQ+yLCOHwAA/BnWPavF9p8RGsvVQASXl9l7Nrc6cJnqlPEhaZg6p3aMIbnX2PVKerUGxUVRWGzYdzG/xfatzo4leKQ+RW1TO1kMFLV62qZEcY41OpRdOeR5N9Ep5iOWZOQCM7G1PNwTRrYPoHh2qdXoPpIneJt7Y7e+0adO0l8pmlJaZTeugAAZ2bbibieaUoidkPZIm+iam3f6q5pKWmcOwHm0J8LfvY5sSG8XhglIO5ZfYFoM6c5roXeSt3f7GxcXx8MMPM2LECFJTU1mzZg0XX3wxvXr1Yvbs2YDVP01NN8mJiYm8++67tdPvvvtuEhISuPTSS2u7ewBIT09n7NixDB48mIsvvrjeq4KV6w7mlbA7u4gRNtXna6TE1tTp9ajek3hkO/rDf/kLZVubtpvioH7xdHr44QaX8dZuf2NiYli+fDm/+tWvmDZtGsuWLaO0tJT+/ftzxx13sGDBAtatW8f69evJzs5myJAhjBkzhuXLl7N9+3Y2btzIkSNHSEhI4JZbbqGiooJ77rmHDz/8kPbt29d23fzKK6+c8f9FWdJq6vMtfKFUXfGd2hAU4Mfafcf5WWJnW2NRrvPIRG+Xut3+Pv/887WJvr5ufwHKy8sZMWLESd3+1qxfM3iJs0WLFvH669Zl5zXd/h4/fvpWDkuXLq0d6OR03f4CtX3x1JfoJ06cCEBiYiKFhYWEh4cTHh5OcHAweXl5LF26lGuvvRZ/f386duzI2LFjWbVqFd99913t9C5dunDhhRcC1hfipk2bGDduHGCVszp31qRwLtIys2kbFkh8C/RW2ZDAAD8Su0boEb2H8chE39iRd3Px1m5/a5bz8/M7aR0/Pz8qKysb3Ed9z8kYQ//+/Wt7zlTnxhjD8swcRvSMxs+v6d9DZyolNpLXl++lvLKawCYctlA1H/0vnQFf7fZ3zJgxvPvuu1RVVXHs2DG+++47hg4dypgxY3jnnXeoqqri0KFDtecH+vbty7Fjx2pfq4qKCjZv3tykMfmS3dlFHMovZWRve+vzNVJioyirrGbb4QK7Q1Eu0kR/Bny129/JkyczcOBAkpKSuPDCC3nqqafo1KkTkydPpk+fPiQmJnLnnXcyduxYAAIDA5k/fz4PPfQQSUlJJCcnk5aW1qQx+RJ3qc/X0BOynke7KXaRt3b762l8sZvimW+ms25fHstmXehS+a85uimua/hfvmV4z7Y8+/OUs96HalraTbFSHqq62lGf79WuWc7xnK2U2EgdWtCDuJToRWSCiGwXkQwROeXKHhG5XkQ2OG5pIpLk6rqeQrv9VXbYeriA48UVtvVvczopsZHszSkmp7DM7lCUCxpN9CLiD7wAXAIkANeKSEKdxXYDY40xA4E/AnPOYF2XuWOZSbUcX/z//9i/jbslequb5HV6VO8RXDmiHwpkGGN2GWPKgXeAK5wXMMakGWNqGnuvALq5uq6rgoODycnJ8ckPu7KSfE5ODsHBwXaH0qLSMnPo2S6MzhEhdodykgFdIgjwEz0h6yFcaUffFdjv9DgLGNbA8rcCn5/luqfVrVs3srKyOHbs2NmsrrxAcHAw3br5znilFVXV/LArh8mDutodyilCAv3p17kNa/drl8WewJVEX98ZoHoPq0XkAqxEX9PA/EzWnQHMAIiNjT1lfqtWrejRo4cL4SrlHTZk5VNUXuU2zSrrSomNZMGaA1RVG/zd4EIudXqulG6yAOfr5rsBB+suJCIDgZeBK4wxOWeyLoAxZo4xJtUYk9q+fXtXYlfKq6VlZAMwvKd71edrpMRGUlhWScbRQrtDUY1wJdGvAvqISA8RCQR+DnzkvICIxAILgBuNMTvOZF2lVP3SMnNI6NyGtmGBdodSr5QY64Ssjjjl/hpN9MaYSuBu4EtgK/CeMWaziNwhInc4FnsUiAZeFJF1IrK6oXWb4Xko5VVKK6pI33fc7ZpVOuseHUpUaCs9IesBXOrUzBjzGfBZnWmzne7fBtzm6rpKqYal7z1OeWU1o2waNtAVIkJKbJSekPUAemWsUm4oLTObAD9hSI+2dofSoJSYSHYeLaSgtMLuUFQDNNEr5YaWZeSQFBNJ6yD37kk8JTYKY2DD/ny7Q1EN0ESvlJspKK1gQ1aeW9fnawyMiUBET8i6O030SrmZlbtyqTbYPj6sK9oEt6JPh9as0UTv1jTRK+Vm0jJzCArwY5CjPxl3lxITxdr9edo9iRvTRK+Um0nLzCY1LorgVv52h+KSlNhI8oor2JNTbHco6jQ00SvlRrILy9h2+ITbdntQn5qeLLVO77400SvlRlbsqhk20P3r8zV6d2hN66AAvXDKjWmiV8qNLMvIITwogMSuEXaH4jJ/PyEpJkIvnHJjmuiVciPLM7MZ1rMtAf6e9dFMiYli66ETlJRX2R2KqodnvZuU8mIH8krYk1PMCA+qz9dIiY2kqtqw8YBeOOWONNEr5SZquiUe5WbDBrZiYMEAAB6aSURBVLoiOSYS0BOy7koTvVJuYnlmDtFhgZzXIdzuUM5YdOsgukeH6glZN6WJXik3YIxhWWY2I3pF4+ehozWlxESyZt9xvXDKDWmiV8oN7Mou4khBmUe1n68rJTaKoyfKOJRfancoqg5N9Eq5gZr6vCe1n68rJbamTq/lG3ejiV4pN5CWmUPXyBC6R4faHcpZi+/UhqAAPz0h64Y00Stls+pqw/JdOYzoFY2IZ9bnAQID/EjsGsHa/XpE72400Stlsy2HCsgrrvDIZpV1pcRGsvFAPuWV1XaHopxoolfKZssza/q38dwTsTVSYqMor6xm66ECu0NRTjTRK2WzZZnZ9GofRsc2wXaHcs5+PCGrdXp3ooleKRtVVFWzcneuVxzNA3SOCKFTm2Ct07sZTfRK2Wj9/jyKy6s8ulllXSmxkdrE0s1oolfKRmmZOYjA8J7elej35RaTXVhmdyjKQRO9UjZKy8wmoXMbosIC7Q6lydSMOLVOj+rdhiZ6pWxSUl7Fmr15jOrtHfX5GgO6RBDgJzoQiRvRRK+UTdL3Hqe8qpoRXlSfBwgJ9Kdf5zZap3cjmuiVssmyzGwC/IShcW3tDqXJpcRGsn5/HlXV2pOlO9BEr5RN0jJzSI6JJCwowO5QmlxKbCRF5VXsPHrC7lAUmuiVskV+SQUbs/K8qlmls5QY64Sslm/cg0uJXkQmiMh2EckQkVn1zI8XkeUiUiYi99eZt0dENorIOhFZ3VSBK+XJVu7OpdrASC87EVuje3QoUaGt9ApZN9Hob0YR8QdeAMYBWcAqEfnIGLPFabFc4F5g0mk2c4ExJvtcg1XKW6RlZhPcyq+2ywBvIyKkxEbpEb2bcOWIfiiQYYzZZYwpB94BrnBewBhz1BizCqhohhiV8jppGTkMiWtLUIC/3aE0m5SYSHYeLSS/RNOC3VxJ9F2B/U6PsxzTXGWAr0QkXURmnG4hEZkhIqtFZPWxY8fOYPNKeZZjJ8rYfuSE1zWrrKvmwqkNWXpUbzdXEn19IyGcSZupUcaYQcAlwF0iMqa+hYwxc4wxqcaY1Pbt25/B5pXyLMt3Wd0Sj/KSjsxOZ2BMBCJ6QtYduJLos4AYp8fdgIOu7sAYc9Dx9yiwEKsUpJTPWp6ZTXhwAP27tLE7lGbVJrgVfTq01hOybsCVRL8K6CMiPUQkEPg58JErGxeRMBEJr7kPjAc2nW2wSnmDZRk5DOsRTYC/97duTomJYu3+PIzRC6fs1Og7zRhTCdwNfAlsBd4zxmwWkTtE5A4AEekkIlnAr4FHRCRLRNoAHYGlIrIeWAl8aoz5ormejFLubn9uMftyi71i2EBXpMRGkldcwZ6cYrtD8WkuXZJnjPkM+KzOtNlO9w9jlXTqKgCSziVApbxJTX3eWwYaaUzNCdm1+47To12YzdH4Lu//7aiUG0nLyKZd60DO69ja7lBaRO8OrWkdFKAnZG2miV6pFmKMIS0zhxG92iFSX2M27+PvJyTFRGiXxTbTRK9UC8k8VsjRE2Ve27/N6aTERLH10AlKyqvsDsVnaaJXqoWkZfpG+/m6UmIjqao2bDyQb3coPksTvVItJC0jh66RIcS0DbE7lBaVHGP156Pt6e2jiV6pFlBVbVi+K4dRvaN9pj5fI7p1EN2jQ/WErI000SvVArYeKiC/pMJnmlXWlRITyZp9x/XCKZtooleqBSzLsHrp9rUTsTVSYqM4eqKMQ/mldofikzTRK9UC0jJz6N2hNR3aBNsdii1q+t3X8o09NNEr1czKK6tZuTvXZ4/mAeI7tSEowE9PyNpEE71SzWx9Vh4lFVU+W58HCAzwI7FrBGv36xG9HTTRK9XM0jJyEIHhPdvaHYqtUmIj2Xggn/LKartD8Tma6JVqZssysxnQJYLI0EC7Q7FVSmwU5ZXVbD1UYHcoPkcTvVLNqKS8irX7jvt0fb7GjydktU7f0jTRK9WMVu3JpaLKMLK379bna3SOCKFTm2Ct09tAE71SzSgtM4cAP2FIXJTdobiFlNhIbWJpA030SjWjtMxsUmIjCQ10aYwfr5cSG8m+3GKyC8vsDsWnaKJXqpnkF1ew6UC+TzerrKtmxKl1elTfojTRK9VMftidQ7Xx3W4P6jOgSwQBfsIaPSHbojTRK9VM0jJzCG7lV3sUqyAk0J/UuCjeXbWfHC3ftBhN9Eo1k7TMbIbEtSUwQD9mzh6b2J+C0goe/Wiz3aH4DH0HKtUMVu/JZceRQkZrs8pTxHdqwy9/eh6fbjjEJxsO2h2OT9BEr1QTO1FawS/fXUdM2xCuGxZrdzhu6fYxPRnYLYLf/XcTx05oCae5aaJXqon9/qPNHMwr4dmpyYQHt7I7HLcU4O/H/7s6iaLyKh7570YdkKSZaaJXqgl9suEgC9Yc4O4L+zC4u293YtaYPh3D+c248/hy8xE+Wq8lnOakiV6pJnIov4TfLtxEUkwk91zY2+5wPMJtP+lJSmwkj364maMFOvpUc9FEr1QTqK42/Oa99VRUVfPc1GRa+etHyxX+fsLTVydRWlHFwwu1hNNc9N2oVBN4eeku0jJz+P3lCcS1C7M7HI/Sq31rHri4L99sPcqCNQfsDscraaJX6hxtOVjA377czviEjlyTGmN3OB5p+qgeDImL4rGPN3NYBxBvcprolToHpRVV3PfOWqJCA3niyoGIiN0heSR/P+FvVyVRUVXNrAUbtITTxFxK9CIyQUS2i0iGiMyqZ368iCwXkTIRuf9M1lXKkz3x+TZ2Hi3kb1cn0TbMt0eQOldx7cKYNSGeJduP8f7qLLvD8SqNJnoR8QdeAC4BEoBrRSShzmK5wL3A02exrlIeacn2o8xL28O0kXGMPa+93eF4hZtGxDG8Z1v++MkWDuSV2B2O13DliH4okGGM2WWMKQfeAa5wXsAYc9QYswqoONN1lfJEOYVlPDB/A+d1bM2sS+LtDsdr+DlKOFXGMOsDLeE0FVcSfVdgv9PjLMc0V7i8rojMEJHVIrL62LFjLm5eqZZnjGHWgo3kF1fw7NQUglv52x2SV4lpG8rDP+vH9zuzeXvl/sZXUI1yJdHXd3bJ1a9Zl9c1xswxxqQaY1Lbt9efwcp9vbNqP19vOcIDF/cloUsbu8PxStcPi2V073b8+dMt7M8ttjscj+dKos8CnNuMdQNcvV75XNZVyu3szi7i8Y+3MKp3NLeO7mF3OF5LRHjiykREhAfnb6C6Wks458KVRL8K6CMiPUQkEPg58JGL2z+XdZVyKxVV1fzynbUEBvjx9NVJ+PlpU8rm1C0qlEcu7cfyXTm88cNeu8PxaI0memNMJXA38CWwFXjPGLNZRO4QkTsARKSTiGQBvwYeEZEsEWlzunWb68ko1Zye/3Yn67Py+cvkRDpHhNgdjk+YOiSGsee156+fbWNvTpHd4Xgsl9rRG2M+M8acZ4zpZYz5s2PabGPMbMf9w8aYbsaYNsaYSMf9gtOtq5SnWb0nlxcWZ3DloG5cOrCz3eH4jJoSToC/8MD7WsI5W3plrFKNqBlIpGtUCI9N1MtAWlrniBB+f3l/Vu7JZV7aHrvD8Uia6JVqhA4kYr8rB3XlovgOPPXlNnYdK7Q7HI+jiV6pBtQOJHJBbx1IxEYiwl+mJBIU4M/976+nSks4Z0QTvVKncdJAIhf1sTscn9exTTB/mNifNfvymLt0l93heBRN9ErVw3kgkWd1IBG3cUVyF8YndOTpr3aQcfSE3eF4DH33KlWPmoFEHr0sgR46kIjbEBH+PDmRsEB/fvP+Biqrqu0OySNooleqDueBRKYO0YFE3E378CD+OGkA6/fnMed7LeG4QhO9Uk6qjeG+d9YSqQOJuLXLBnbh0sTOPPv1TrYf1hJOYzTRK+VkX24xO48W8rQOJOL2Hr+iP+HBAdz/vnUuRZ2eJnqlHPJKyjlcUKoDiXiI6NZB/GnSADYeyGf2kky7w3FrmuiVwhpIJPNYESGt/HUgEQ9ySWJnJiZ14flFO9lysMDucNyWJnrl82oGEqmsqqZ3h9Y6kIiH+cPE/kSEBPKb99dTXqklnPpoolc+713HQCIxbUMJCwywOxx1hqLCAvnL5AFsPVTAPxdn2B2OW9JEr3za7uwi/vDxFkb2iqZzRLDd4aizNL5/J6akdOWFxRlsOpBvdzhuRxO98lnOA4n8v2uSkHpHvlSe4veX96dd60B+/d46issr7Q7HrWiiVz7JGMOfPtmiA4l4kYjQVvztqiQyjhZy79vrtOMzJ5rolU967tudvLZ8L7eO7qEDiXiRMee157GJ/flm6xEe+2gzxmiyB9AzT8rnzFu2m2e/2cmVg7rx25/1szsc1cRuGhHHgeMl/Ou7XXSLCuH2sb3sDsl2muiVT1m4NovHPt7CuISOPHllog7w7aUemhDPgbwS/vr5NrpEhnB5Uhe7Q7KVJnrlM77deoT739/AiJ7R/OPaFAK062Gv5ecnPH11EkdPlPGb99bTITyIYT2j7Q7LNvpOVz7hh105zHxzDQmd2zDnpsF6UZQPCG7lz5wbBxPTNoRfvL7ap/uv10SvvN6mA/nc9tpqukaFMG/6EB331YdEhgYyb/pQAgP8ufmVVRw9UWp3SLbQRK+82q5jhdz8ykrCgwN449ZhRLcOsjsk1cJi2obyyrRUcovKuWXeKorKfK+NvSZ65bUO5Zdw49yVAPzntmF0idS28r5qYLdIXrg+hS0HC7j7rTU+NzKVJnrllXKLyrlx7krySyp47Zah9Grf2u6QlM0ujO/IHycNYPH2Y/zuQ99qY6+tbpTXKSyrZNqrK9mXW8zrtwxlQNcIu0NSbuL6Yd05cLyEF5dk0i0qhLsu6G13SC1CE73yKqUVVcx4fTWbDxbwrxsGM9yHm9Sp+t0/vi8H8kr425fb6RoZwqSUrnaH1Ow00SuvUVlVzb1vryUtM4e/X5PETxM62h2SckN+fsJTVw3kSEEpD8xfT4c2QYzs1c7usJqV1uiVVzDG8H8LNvLVliM8elkCUwZ1szsk5caCAvz51w2pxEWHcft/0tlxxLvb2LuU6EVkgohsF5EMEZlVz3wRkecd8zeIyCCneXtEZKOIrBOR1U0ZvFJgJfm/fLaV99OzuPeiPtwyuofdISkPEBHailenDyG4lT/TXlnJkQLvbWPfaKIXEX/gBeASIAG4VkQS6ix2CdDHcZsBvFRn/gXGmGRjTOq5h6zUyV5cksm/v9/NzSO686uf9rE7HOVBukWF8uq0IeSVVDD91VUUemkbe1eO6IcCGcaYXcaYcuAd4Io6y1wBvG4sK4BIEdG+X1Wze2PFXv725XauSO7C7y/vj4h2UqbOzICuEbx4/SC2HznBzDfXUOGFbexdSfRdgf1Oj7Mc01xdxgBfiUi6iMw43U5EZIaIrBaR1ceOHXMhLOXrPl5/kN99uIkL4zvw9NVJ2hOlOmvn9+3AnycN4Lsdx3hk4Sava2PvSqub+j49dV+FhpYZZYw5KCIdgK9FZJsx5rtTFjZmDjAHIDU11bteZdXk/rfjGL9+bx2p3aN44bpBtNKeKNU5+vnQWA7klfCPRRl0jQrh3ou8pwzoyqcjC4hxetwNOOjqMsaYmr9HgYVYpSClzlr63lzu+E86vTuE8/LNQwgJ1J4oVdP49bjzmJLSlb9/vYP56Vl2h9NkXEn0q4A+ItJDRAKBnwMf1VnmI+AmR+ub4UC+MeaQiISJSDiAiIQB44FNTRi/8jFbDxUw/dVVdGwTxOu3DCUiRHuiVE1HRHjiyoGM7BXNrA82sHRntt0hNYlGE70xphK4G/gS2Aq8Z4zZLCJ3iMgdjsU+A3YBGcC/gZmO6R2BpSKyHlgJfGqM+aKJn4PyEXtzirjplZWEBgbwn1uH0T5ce6JUTS8wwI/ZNw6mV/vW3PFGOlsPFdgd0jlz6cpYY8xnWMncedpsp/sGuKue9XYBSecYo1IcKSjlhrk/UFFVzVu3jyCmbajdISkv1ibYamM/+cVlTH91FQvvGknnCM/t/VTPYCm3l1dczk1zV5JTWM686UPp0zHc7pCUD+gSGcKr04ZSWFbJ9FdXUVBaYXdIZ00TvXJrxeWV3DJvFbuzi/j3Takkx0TaHZLyIQld2vDi9YPIOFrIzDfWUF7pmW3sNdErt7X1UAE3v7KSdfvzeP7aZEb19u6Op5R7GnNee/46JZGlGdn834KNHtnGXnuvVG5nf24xf/96B/9dd4DwoAD+fk0yEwbohdbKPlenxnAgr4Rnv9lJWJA/j16WQIAHXbuhiV65jezCMv65KIM3f9iLnwi3j+nFnWN7ERGqTSiV/e67qA9FZZX8+/vd7M8t5h/XDaJ1kGekUM+IUnm1E6UV/Pv73bz8/S7KKqu5JjWG+y7qQ6eIYLtDU6qWiPDbSxPoHh3G7z/azFUvpTF32hC6esBYxJrolW3KKqt4Y8U+XlicQW5ROZcmdubX48/T8V2VW7theHdi24Zy15truOKfy3j5ZvdvJOA5RSblNaqqDfPTs7jw6f/xx0+20K9zOB/eNYoXrh+kSV55hDHntWfBzJEEt/Jj6r+W89nGQ3aH1CBN9KrFGGP4ZssRfvbc99z//nrahgXyxq3DePO24SS5+RGRUnX16RjOf+8aRf8ubZj55hpeWJzhti1ytHSjWsSqPbk8+fk2Vu89To92YfzzuhR+NqCzdi2sPFq71kG89YvhPDh/A3/7cju7jhXx1ymJBAa41zG0JnrVrLYdLuBvX2zn221H6RAexJ8nD+Ca1BjtVlh5jeBW/jz382R6tAvjuW93knW8mNk3DCYqLNDu0GppolfNYn9uMc98vYOF6w7QOiiAByf0ZfrIHtqlsPJKIsKvxp1Hj3ZhPDh/A1NeSmPuzan0dJNzTproVZOq2xZ+xpie3Dm2F5Gh7nN0o1RzmZTSlW5RIcz4TzqTX0xj9g2DGdEr2u6wNNGrpnGitIKXHW3hSyuruSa1G/dddJ62hVc+JzWuLf+dOYrp81Zy0ys/8OfJiVyTGtP4is1IE706J0Vllby7aj//dLSF/1liJ34zvq82k1Q+LTY6lAUzRzHzzXQenL+B3dlFPDC+r22NDzTRqzNWXW1YsTuH+elZfLHpMMXlVYzsFc1DE+K1maRSDhEhrZg3fSiPfriZl5Zksie7iL9fk2zLeSpN9Mple7KLWLAmiw/WHOBAXgnhQQFMTOrCVYO7Mbh7FCLaVFIpZ638/fjL5AH0ah/Gnz/bysE5y/n3Tal0aNOyJU1N9KpBJ0or+HTDIT5Yk8WqPccRgdG92/HghL6MT+ikrWiUaoSIcNtPehLbNpT73lnHpBeWMXfaEPp1btNiMWiiV6eoqjakZWYzPz2LLzcfprSimp7tw3hwQl8mp3T16CHVlLLL+P6deP+OEdz62iqueimNf1yXwoXxHVtk35roVa3MY4V8kJ7FwrUHOJRfSpvgAK4a3I0rB3UjOSZSSzNKnaMBXSP48K7R3PraKm57bTWPXpbAtFE9mn2/muh9XH5JBZ9sOMj89CzW7svDT2Dsee357aX9+Gm/jgS30tKMUk2pU0Qw790+gl++u47HPt7CruyiZh/IRBO9D6qsqub7jGw+SM/iqy1HKK+spm/HcB7+WTyTkru2+IkipXxNWFAAs28YzJNfbGPOd7vYm1PMP69LITy4eQbZ0UTvQ3YcOVFbmjl6ooyo0FZcNzSWqwZ3o3+XNlqaUaoF+fsJD/+sH3HRYfzuw01c9dJy5k5LpVtUaJPvSxO9Fyoqq+RQfgkH80o5mFfCgbwSvttxjPVZ+QT4CRfEd+DKQd24ML6D2/Wyp5SvuW5YLLFtQ7nzzXSmvJjG4vvPJ6yJhyjURO9hKquqOXqijEP5JRxwJPIfb6UczC8hr7jipHX8BPp1bsOjlyUwMbkL7VoH2RS9Uqo+o/u0Y+HMkazdl9fkSR400bsVYwwFJZUczP8xedck85oj9MMFpVRVnzy4QZvgALpEhtA1MoTB3aPoEhlCl8hgx98QOoYHedSI9Ur5ot4dwundIbxZtq2JvoVVVxuOnChld3YRe7KL2ZNTxO7sIvbmFHHgeAlF5VUnLd/KX+gcYSXuYT3b0iUipDaRd40MoXNkiMeMRK+UsodmiGZgjOFYYZmVyLOL2J1TZP3NLmJPThGlFdW1ywYG+NG9bSjdo8MY1bsdXR1H4Z0jrETernWQjsKklDonmujPkjGG3KJyxxH5yQl9b04xhWWVtcsG+AmxbUOJa2cl87h2YfSIDiOuXSidI0Lw10SulGpGmujrUVVtyCsu53hxOblFFeQWWfcP55eyx+novKD0x2Tu7yd0iwohLjqMIXFtiYu2EnuPdmF0jQzRGrlSyjYuJXoRmQA8B/gDLxtjnqgzXxzzfwYUA9OMMWtcWbe5GWM4UVZJbmE5ucXlHC8qr03cuUUV1uOa6cXWvPySCuobzF0EukSE0KNdGBOTuxAXbSXyHu3C6BYVqk0VlVJuqdFELyL+wAvAOCALWCUiHxljtjgtdgnQx3EbBrwEDHNx3SZhjOF3H24ityaRF1XUJvDK6nqyNtaJzrZhgUSFBtI2LJB+ndvQNjSQqLBA2oa2sv7WuQUFaJcASinP4soR/VAgwxizC0BE3gGuAJyT9RXA68YYA6wQkUgR6QzEubBukxARlu7Mxt9PiA4LIq5dKIPCImuTeO3fsEBHMm9F66AAvRpUKeX1XEn0XYH9To+zsI7aG1umq4vrAiAiM4AZALGxsS6EdaolD1xwVuspBUCnRLsjaBLxbePPeRtB/c59G8p9uJLo6zvkrVsLOd0yrqxrTTRmDjAHIDU1tf5ai1LN6ZIWPX3UbB4a+tA5b6PTww83QSTKXbiS6LMA5yHMuwEHXVwm0IV1lVJKNSNXmomsAvqISA8RCQR+DnxUZ5mPgJvEMhzIN8YccnFdpZRSzajRI3pjTKWI3A18idVE8hVjzGYRucMxfzbwGVbTygys5pXTG1q3WZ6JUkqpeompr8G4zVJTU83q1avtDkMppTyGiKQbY1Lrm6dX+CillJfTRK+UUl5OE71SSnk5TfRKKeXl3PJkrIgcA/Y24y7aAdnNuP2moDGeO3ePDzTGpqIxQndjTPv6Zrhlom9uIrL6dGen3YXGeO7cPT7QGJuKxtgwLd0opZSX00SvlFJezlcT/Ry7A3CBxnju3D0+0BibisbYAJ+s0SullC/x1SN6pZTyGZrolVLKy/lMoheRGBFZLCJbRWSziNxnd0x1iUiwiKwUkfWOGP9gd0ynIyL+IrJWRD6xO5b6iMgeEdkoIutExC17yHMMuTlfRLY53pcj7I7JmYj0dbx+NbcCEfml3XHVJSK/cnxeNonI2yISbHdMdYnIfY74NtvxGvpMjd4xhm1nY8waEQkH0oFJzTFQ+dkSawDbMGNMoYi0ApYC9xljVtgc2ilE5NdAKtDGGHOZ3fHUJSJ7gFRjjNteRCMirwHfG2NedozXEGqMybM7rvqIiD9wABhmjGnOixnPiIh0xfqcJBhjSkTkPeAzY8w8eyP7kYgMAN7BGn+7HPgCuNMYs7OlYvCZI3pjzCFjzBrH/RPAVqwxbd2GsRQ6HrZy3Nzum1hEugGXAi/bHYunEpE2wBhgLoAxptxdk7zDRUCmOyV5JwFAiIgEAKG43yh2/YAVxphiY0wl8D9gcksG4DOJ3pmIxAEpwA/2RnIqR0lkHXAU+NoY43YxAs8CDwLVdgfSAAN8JSLpjoHn3U1P4BjwqqME9rKIhNkdVAN+DrxtdxB1GWMOAE8D+4BDWKPbfWVvVKfYBIwRkWgRCcUapCmmkXWalM8lehFpDXwA/NIYU2B3PHUZY6qMMclY4+sOdfzscxsichlw1BiTbncsjRhljBkEXALcJSJj7A6ojgBgEPCSMSYFKAJm2RtS/RxlpYnA+3bHUpeIRAFXAD2ALkCYiNxgb1QnM8ZsBZ4EvsYq26wHKlsyBp9K9I669wfAm8aYBXbH0xDHz/glwASbQ6lrFDDRUQN/B7hQRN6wN6RTGWMOOv4eBRZi1UfdSRaQ5fSLbT5W4ndHlwBrjDFH7A6kHj8FdhtjjhljKoAFwEibYzqFMWauMWaQMWYMkAu0WH0efCjRO050zgW2GmP+bnc89RGR9iIS6bgfgvUm3mZvVCczxvyfMaabMSYO6+f8ImOMWx1BiUiY44Q7jnLIeKyfz27DGHMY2C8ifR2TLgLcpmFAHdfihmUbh33AcBEJdXzGL8I6/+ZWRKSD428sMIUWfj0bHRzci4wCbgQ2OmrgAA8bYz6zMaa6OgOvOVo4+AHvGWPcsvmim+sILLQ+9wQAbxljvrA3pHrdA7zpKI3sAqbbHM8pHDXlccDtdsdSH2PMDyIyH1iDVQ5Zi3t2h/CBiEQDFcBdxpjjLblzn2leqZRSvspnSjdKKeWrNNErpZSX00SvlFJeThO9Ukp5OU30Sinl5TTRK6WUl9NEr5RSXu7/A6PFQl+mz34rAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "dt = X_test\n", + "y_dt = y_test\n", + "\n", + "for _ in range(10):\n", + " i = randint(0, len(dt))\n", + "\n", + " plt.figure()\n", + " plt.title('{originalTitle} ({startYear:.00f}, {genres})'.format(**dt[i]))\n", + " preds = np.asarray([m.predict_proba([dt[i]])[0,1] for m in models])\n", + " preds = preds[1:] - preds[:-1]\n", + "\n", + " plt.plot(mids, preds, '-o', label='predicted distribution')\n", + " plt.plot([y_dt.iloc[i], y_dt.iloc[i]], [0, preds.max()], label='true rating')\n", + " plt.plot([(preds * mids).sum(), (preds * mids).sum()], [0, preds.max()], label='average prediction')\n", + " plt.plot([mids[preds.argmax()], mids[preds.argmax()]], [0, preds.max()], label='prediction mode')\n", + " plt.legend(loc='best')\n", + " plt.xlabel('Rating value')\n", + " plt.ylabel('Probabilty')\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Para pensar\n", + "\n", + "Pensando en el contexto de querer brindar una herramienta que guie la construccion de guiones\n", + "\n", + "* Nos da algún tipo de ventaja para construir un mejor producto esta técnica?\n", + "* Qué metrica offline podemos usar para evaluar este modelo?\n", + "* Habría una ventaja de utilizar un modelo basado en arboles?\n", + "* Para leer: Cómo se podría empaquetar este código en algo compatible con sklearn.linear_models.LogisticRegression? " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/clase-3/02-hipotesis-elenco.ipynb b/notebooks/clase-3/02-hipotesis-elenco.ipynb new file mode 100644 index 0000000..bae91ff --- /dev/null +++ b/notebooks/clase-3/02-hipotesis-elenco.ipynb @@ -0,0 +1,1221 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Boilerplate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from google.colab import drive\n", + "\n", + "drive.mount('/content/gdrive')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Si hay cambios en el repositorio, con esta linea actualizas tu drive\n", + "!cd /content/gdrive/My\\ Drive/ml-practico/code; git pull" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('/content/gdrive/My Drive/ml-practico/code')" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "# Para trabajar local\n", + "sys.path.append('/Users/przivic/prog/machine_learning_practico')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/przivic/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3263: DtypeWarning: Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " if (await self.run_code(code, result, async_=asy)):\n" + ] + } + ], + "source": [ + "from lib import data\n", + "from pathlib import Path\n", + "import pandas as pd\n", + "\n", + "PATH = Path('../../data/')\n", + "ratings_df = data.load_title_ratings(PATH)\n", + "basics_df = data.load_title_basics(PATH)\n", + "principals_df = pd.read_csv(PATH / 'title.principals.tsv', sep='\\t')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Recordemos como era la data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tconstorderingnconstcategoryjobcharacters
0tt00000011nm1588970self\\N[\"Self\"]
1tt00000012nm0005690director\\N\\N
2tt00000013nm0374658cinematographerdirector of photography\\N
3tt00000021nm0721526director\\N\\N
4tt00000022nm1335271composer\\N\\N
\n", + "
" + ], + "text/plain": [ + " tconst ordering nconst category job \\\n", + "0 tt0000001 1 nm1588970 self \\N \n", + "1 tt0000001 2 nm0005690 director \\N \n", + "2 tt0000001 3 nm0374658 cinematographer director of photography \n", + "3 tt0000002 1 nm0721526 director \\N \n", + "4 tt0000002 2 nm1335271 composer \\N \n", + "\n", + " characters \n", + "0 [\"Self\"] \n", + "1 \\N \n", + "2 \\N \n", + "3 \\N \n", + "4 \\N " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "principals_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "actor 9428011\n", + "actress 7087536\n", + "self 6992466\n", + "writer 5443248\n", + "director 4739306\n", + "producer 2533985\n", + "cinematographer 1472541\n", + "composer 1461940\n", + "editor 1375324\n", + "production_designer 302528\n", + "archive_footage 245691\n", + "archive_sound 2605\n", + "Name: category, dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "principals_df.category.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tconstorderingnconstcategoryjobcharacters
1025389tt012033810nm0365239editor\\N\\N
1025390tt01203381nm0000138actor\\N[\"Jack Dawson\"]
1025391tt01203382nm0000701actress\\N[\"Rose Dewitt Bukater\"]
1025392tt01203383nm0000708actor\\N[\"Cal Hockley\"]
1025393tt01203384nm0000870actress\\N[\"Molly Brown\"]
1025394tt01203385nm0000116director\\N\\N
1025395tt01203386nm0484457producerproducer\\N
1025396tt01203387nm0000035composer\\N\\N
1025397tt01203388nm0005665cinematographerdirector of photography\\N
1025398tt01203389nm0119322editor\\N\\N
\n", + "
" + ], + "text/plain": [ + " tconst ordering nconst category \\\n", + "1025389 tt0120338 10 nm0365239 editor \n", + "1025390 tt0120338 1 nm0000138 actor \n", + "1025391 tt0120338 2 nm0000701 actress \n", + "1025392 tt0120338 3 nm0000708 actor \n", + "1025393 tt0120338 4 nm0000870 actress \n", + "1025394 tt0120338 5 nm0000116 director \n", + "1025395 tt0120338 6 nm0484457 producer \n", + "1025396 tt0120338 7 nm0000035 composer \n", + "1025397 tt0120338 8 nm0005665 cinematographer \n", + "1025398 tt0120338 9 nm0119322 editor \n", + "\n", + " job characters \n", + "1025389 \\N \\N \n", + "1025390 \\N [\"Jack Dawson\"] \n", + "1025391 \\N [\"Rose Dewitt Bukater\"] \n", + "1025392 \\N [\"Cal Hockley\"] \n", + "1025393 \\N [\"Molly Brown\"] \n", + "1025394 \\N \\N \n", + "1025395 producer \\N \n", + "1025396 \\N \\N \n", + "1025397 director of photography \\N \n", + "1025398 \\N \\N " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "principals_df[principals_df.tconst == 'tt0120338']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Distribución de cantidad de peliculas por nconst" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tconsttitleTypeprimaryTitleoriginalTitleisAdultstartYearendYearruntimeMinutesgenres
8tt0000009movieMiss JerryMiss Jerry01894.0\\N45.0[Romance]
144tt0000147movieThe Corbett-Fitzsimmons FightThe Corbett-Fitzsimmons Fight01897.0\\N20.0[Documentary, News, Sport]
498tt0000502movieBohemiosBohemios01905.0\\N100.0[no-genre]
570tt0000574movieThe Story of the Kelly GangThe Story of the Kelly Gang01906.0\\N70.0[Biography, Crime, Drama]
672tt0000679movieThe Fairylogue and Radio-PlaysThe Fairylogue and Radio-Plays01908.0\\N120.0[Adventure, Fantasy]
\n", + "
" + ], + "text/plain": [ + " tconst titleType primaryTitle \\\n", + "8 tt0000009 movie Miss Jerry \n", + "144 tt0000147 movie The Corbett-Fitzsimmons Fight \n", + "498 tt0000502 movie Bohemios \n", + "570 tt0000574 movie The Story of the Kelly Gang \n", + "672 tt0000679 movie The Fairylogue and Radio-Plays \n", + "\n", + " originalTitle isAdult startYear endYear \\\n", + "8 Miss Jerry 0 1894.0 \\N \n", + "144 The Corbett-Fitzsimmons Fight 0 1897.0 \\N \n", + "498 Bohemios 0 1905.0 \\N \n", + "570 The Story of the Kelly Gang 0 1906.0 \\N \n", + "672 The Fairylogue and Radio-Plays 0 1908.0 \\N \n", + "\n", + " runtimeMinutes genres \n", + "8 45.0 [Romance] \n", + "144 20.0 [Documentary, News, Sport] \n", + "498 100.0 [no-genre] \n", + "570 70.0 [Biography, Crime, Drama] \n", + "672 120.0 [Adventure, Fantasy] " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "basics_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Nos vamos a quedar solo con principals de las peliculas que nos interesan\n", + "# Posiblemente de cosas diferentes \n", + "movie_principals_df = principals_df[principals_df.tconst.isin(set(basics_df.tconst.unique()))]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.10394285472418875" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(movie_principals_df) / len(principals_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Distribucion de cantidad de peliculas por persona" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Number of films per person')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "(\n", + " movie_principals_df\n", + " .nconst\n", + " .value_counts()\n", + " .value_counts(normalize=True)\n", + " .sort_index()\n", + " .plot(logx=True, logy=True, label='movies')\n", + ")\n", + "(\n", + " principals_df\n", + " .nconst\n", + " .value_counts()\n", + " .value_counts(normalize=True)\n", + " .sort_index()\n", + " .plot(logx=True, logy=True, label='general')\n", + ")\n", + "\n", + "plt.legend(loc='best')\n", + "plt.title('Number of films per person')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Relación con ratings\n", + "\n", + "Experimento: vamos a ver ejemplos claros" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "movie_principals_df = movie_principals_df.merge(ratings_df, on='tconst')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tconstorderingnconstcategoryjobcharactersaverageRatingnumVotes
0tt00000091nm0063086actress\\N[\"Miss Geraldine Holbrook (Miss Jerry)\"]5.9154
1tt00000092nm0183823actor\\N[\"Mr. Hamilton\"]5.9154
2tt00000093nm1309758actor\\N[\"Chauncey Depew - the Director of the New Yor...5.9154
3tt00000094nm0085156director\\N\\N5.9154
4tt00001471nm0179163self\\N[\"Self\"]5.2356
\n", + "
" + ], + "text/plain": [ + " tconst ordering nconst category job \\\n", + "0 tt0000009 1 nm0063086 actress \\N \n", + "1 tt0000009 2 nm0183823 actor \\N \n", + "2 tt0000009 3 nm1309758 actor \\N \n", + "3 tt0000009 4 nm0085156 director \\N \n", + "4 tt0000147 1 nm0179163 self \\N \n", + "\n", + " characters averageRating numVotes \n", + "0 [\"Miss Geraldine Holbrook (Miss Jerry)\"] 5.9 154 \n", + "1 [\"Mr. Hamilton\"] 5.9 154 \n", + "2 [\"Chauncey Depew - the Director of the New Yor... 5.9 154 \n", + "3 \\N 5.9 154 \n", + "4 [\"Self\"] 5.2 356 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movie_principals_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "person_metadata = (\n", + " movie_principals_df[movie_principals_df.numVotes > 1000]\n", + " .groupby('nconst')\n", + " .agg(\n", + " n_films=('tconst', 'count'), \n", + " avg_rating=('averageRating', 'mean'),\n", + " max_rating=('averageRating', 'max'),\n", + " min_rating=('averageRating', 'min'),\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "cohort = (person_metadata.n_films > 10) & (person_metadata.n_films < 30)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "person_metadata[cohort].avg_rating.hist(bins=30, alpha=0.5, label='avg_rating')\n", + "person_metadata[cohort].max_rating.hist(bins=30, alpha=0.5, label='max_rating')\n", + "person_metadata[cohort].min_rating.hist(bins=30, alpha=0.5, label='min_rating')\n", + "\n", + "plt.legend(loc='best')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Parece haber señal, veamos ejemplos puntuales" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "person_metadata['link'] = (\n", + " person_metadata.index.to_series().apply(lambda nconst: f'link')\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_filmsavg_ratingmax_ratingmin_ratinglink
nconst
nm0531431127.7333338.37.1link
nm0604335117.0454557.46.7link
nm0741013117.3636368.26.6link
nm0040916157.8733338.37.4link
nm1190372147.7000008.57.1link
nm0001425127.6250008.26.7link
nm0085038117.7545458.27.0link
nm0792670117.2363647.96.6link
nm4043111207.8050008.67.1link
nm0003593197.3210537.96.8link
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import HTML\n", + "\n", + "df = person_metadata[cohort].sort_values('min_rating', ascending=False)[:100].sample(10)\n", + "HTML(\n", + " person_metadata[cohort]\n", + " .sort_values('min_rating', ascending=False)[:100]\n", + " .sample(10)\n", + " .to_html(escape=False)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Hay mucha gente de principios del siglo 20. Vamos a filtrar por peliculas mas actuales" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "movie_principals_df = movie_principals_df.merge(basics_df, on='tconst')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "person_metadata = (\n", + " movie_principals_df[(movie_principals_df.numVotes > 1000) & (movie_principals_df.startYear > 1970)]\n", + " .groupby('nconst')\n", + " .agg(\n", + " n_films=('tconst', 'count'), \n", + " avg_rating=('averageRating', 'mean'),\n", + " max_rating=('averageRating', 'max'),\n", + " min_rating=('averageRating', 'min'),\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "cohort = (person_metadata.n_films > 10) & (person_metadata.n_films < 30)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAD4CAYAAADmWv3KAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAenElEQVR4nO3de3SU9bX/8fcmQAImWq75IXFB/BWqoOEWEEFdiZz+yjpWoFoUVmvRehoUqByqrXC6XOqqULTWHhGpxUulBaqopVJrT48gUwsFERWQm4IQJRWBQxWJECC4f39kyElgMpkkc8uTz2stVma+z23PQ2bPN9/nO/sxd0dERIKnVaoDEBGRxFCCFxEJKCV4EZGAUoIXEQkoJXgRkYBqneoAADp37uw9e/ZMdRhx8fnnn3PWWWelOoy0onMSmc5LZDovkUU6L2+++eb/uHuXurZJiwTfs2dP1q9fn+ow4iIUClFUVJTqMNKKzklkOi+R6bxEFum8mNkH0bbREI2ISEApwYuIBJQSvIhIQKXFGHwkJ06coKysjIqKilSH0iDnnHMO27ZtS3UYDZaVlUVeXh5t2rRJdSgiEidpm+DLysrIycmhZ8+emFmqw4nZ4cOHycnJSXUYDeLuHDx4kLKyMvLz81MdjojESdoO0VRUVNCpU6dmldybKzOjU6dOze6vJRGJLm0TPKDknkQ61yLBk9YJXkREGi9tx+BP94tX3ovr/qZ9tXdc9ycikm6aTYJvqUKhEG3btmXYsGEAPPbYY7Rv357vfOc7KY5MpA4rf1r3suIZyYtDlODTQWVlJa1bR/6vCIVCZGdnVyf4W265JZmhiUgzpgRfjzFjxrBnzx4qKiqYOnUqJ0+eZPfu3TzwwAMAPP3007z55ps88sgj/OQnP+G3v/0tPXr0oHPnzgwaNIg77rgj4n6LiooYNmwYq1evZtSoUfTu3Zv77ruP48eP06lTJxYtWsTRo0d57LHHyMjIYOHChTzyyCOsWLGC7Oxs7rjjDoqKirjkkktYuXIln376KU8++SSXX345R44c4cYbb2T79u1ceOGFlJaW8uijj1JYWJjMUyciKaYEX4+nnnqKjh07cvToUQYPHsyKFSsYPnx4dYJ/9tln+fGPf8z69et54YUXWLVqFe3atWPgwIEMGjQo6r4//fRT/vrXvwLwySefsHbtWsyMJ554ggceeICf//zn3HLLLdUJHWDFihW19lFZWcm6det4+eWXuffee1m+fDnz5s2jQ4cObNq0ic2bN9O/f/8EnBkRSXdK8PWYM2cOS5cuBWDPnj3s3r2b888/n7Vr19KrVy/effddhg8fzsMPP8zo0aNp164dOTk5XH311fXu+/rrr69+XFZWxvXXX8/evXs5fvx4zF84uuaaawAYNGgQpaWlAKxatYqpU6cCcNFFF1FQUNCQlywiAaFpklGEQiGWL1/OmjVr2LhxIwMGDKCiooLrr7+eJUuW8MILL/CNb3wDM8PdG7z/mrWdv//97zNlyhTeeecdfvWrX8X8paPMzEwAMjIyqKysBGhULCISPM2mB5+KaY2HDh2iQ4cOtG/fnu3bt7N27Vqgqtc8c+ZMevTowf333w/AZZddxsSJE5kyZQpmxp/+9Ce+973vNehY3bt3B2DBggXV7Tk5OXz22WcNivuyyy5jyZIlFBcXs3XrVt55550GbS8iwaAefBQjR46ksrKSgoIC7rrrLoYOHQpAhw4d6NOnDx988AFDhgwBYPDgwYwaNYphw4ZxzTXXUFhYyDnnnBPzse655x7Gjh3L5ZdfTufOnavbr776apYuXUr//v3529/+FtO+Jk2axIEDBygoKOD++++noKCgQbGISDA0mx58KmRmZvLnP/854rKXXnrpjLY77riD22+/nYyMDK644gpuv/32OvcdCoVqPR89ejSjR48+Y73evXuzadOm6ueXX355xH107ty5egw+KyuLhQsXkpWVxfvvv8+IESPo0aNHnbGISDApwcdRSUkJmzdv5vjx40yYMIGBAwemJI4jR45QXFzMiRMncHd++ctf0rZt25TEIiKpowQfR4sXLz6jXPDkyZNZvXp1rfWmTp3KTTfdlLA4cnJyAnOPWxFpPCX4BHv00UdTHYIETLS6TKqxJDXpIquISEApwYuIBJQSvIhIQMU0Bm9mpcBh4CRQ6e6FZtYReBboCZQC17n7J+H1ZwA3h9e/zd3/0uRIo5UgbQyVLRWRgGtID77Y3fu7+6mShNOBFe7eC1gRfo6Z9QHGAX2BkcA8M8uIY8wtVmlpKYsXL65+vn79em677bYURiQi6awpQzSjgVPfqV8AjKnR/oy7H3P33cBOYEgTjtOinKonE8npCb6wsJA5c+YkIywRaYZinSbpwH+bmQO/cvf5QK677wVw971m1jW8bndgbY1ty8JttZhZCVACkJube8Y3O8855xwOHz5c/bzt8WMxhhqb4zX2XZcPPviAa665hksvvZQ33niDiy66iG9/+9vMmjWLAwcO8MQTTwAwffp0KioqyMrKYu7cuVxwwQXMnTuXrVu3Mm/ePLZs2cJ3v/tdVq5cSfv27c84zqxZs/j444/54IMP6NSpE3fffTclJSUcOXIEgAcffJBLLrmEH/7wh7z33nsUFBQwfvx4+vXrx5w5c3juueeYNWsWZWVllJaWUlZWxq233sqtt94KwP3338+SJUvIy8ujY8eODBgwIGLPv6Ki4oz/h3goLy9PyH6bu8ael+4Vdb8XQqGPmhBRnJRHqYQaw+vV70tkjTkvsSb44e7+UTiJv2Jm26OsaxHazihvGP6QmA9QWFjoRUVFtZZv27at1heGaJsZY6ixyay57zpkZ2eza9cuXnjhBfr27cvgwYP5wx/+wJo1a1i2bBkPP/wwv/nNb1i9ejWtW7dm+fLl3Hfffbz44ovceeedFBUVsXz5cmbOnMnjjz9Obm5u5FgyM9m0aVN1LfkjR47w6quvkpWVxY4dOxg/fjzr16/nZz/7GQ8++GB1mYRQKETr1q3JyckhMzOT999/n5UrV3L48GG+8pWvMG3aNDZu3MhLL73Exo0bqaysZODAgQwdOrT2uQ3LyspiwIABTTuxEYRCIU7//5XGn5do8+CvK0qDefDRrpcVjat3u5DnU+SnFcjTNbNG/b7ElODd/aPwz/1mtpSqIZd9ZtYt3HvvBuwPr14GnFdj8zwgDboVjZOfn8/FF18MQN++fRkxYgRmxsUXX0xpaSmHDh1iwoQJ7NixAzPj2LGq3lWrVq14+umnKSgoYOLEiQwfPjzqcUaNGkW7du0AOHHiBFOmTGHDhg1kZGTw3nux3XD8qquuIjMzk8zMTLp27cq+fftYtWpVdZ16IKY69SISDPWOwZvZWWaWc+ox8P+AzcAyYEJ4tQnAi+HHy4BxZpZpZvlAL2BdvANPllP11qEqaZ963qpVKyorK7nrrrsoLi5m8+bN/PGPf6xO8AA7duwgOzubjz6q//OtZm34X/ziF+Tm5rJx40bWr1/P8ePHGxzrqfrwqg0v0nLF0oPPBZaa2an1F7v7f5nZG8ASM7sZ+BAYC+DuW8xsCbAVqAQmu/vJJkeapn+i1azj/vTTT9dqnzp1Kq+99hpTpkzh+eef55vf/GbM+8zLy6NVq1YsWLCAkyerTl9OTk6t6xKxOFWnfsaMGVRWVja4Tr2INF/1Jnh33wX0i9B+EBhRxzYzgZlNjq4Z+NGPfsSECRN46KGHuPLKK6vbp02bxqRJk+jduzdPPvkkxcXFXHHFFXTt2jXK3qpMmjSJa6+9lueee47i4uLq3n1BQQGtW7emX79+3HjjjTGNl5+qU9+vXz969OjR4Dr1ItJ8WTr8CV9YWOinVz/ctm0bF154YYoiarzTq0mmg/LycrKzszly5AhXXHEF8+fPj1jKOFHnXBdZI0vERda0KDYW7SJrtL/ET11kLc+nKHt37Nu1EJF+X8zszRrfTTqDqkm2ACUlJWzdupWKioqU1qkXkeRSgk+iX//61zz88MO12oYPH57wksI1vxwlIi2HEnwS3XTTTQm90YeISE1K8CIplPbj6dKsqVywiEhAKcGLiARUsxmimbdhXlz3N6n/pLjuT0Qk3agH30TLli1j9uzZCT3GrFmzaj0fNmxYQo8nIsGgBN9Eo0aNYvr06U3ax6lSBHU5PcH//e9/b9LxRKRlaDZDNKlQWlrKyJEjueyyy1i7di39+vXjpptu4u6772b//v0sWrSIrVu3sn79eubOncuNN95IVlYWmzZt4uOPP+aBBx6os/5MKBTi3nvvpVu3bmzYsIGtW7cyZswY9uzZQ0VFBVOnTqWkpITp06dz9OhR+vfvT9++fVm0aBHZ2dnVtaHvueceOnfuzObNmxk0aBALFy7EzHj55Zf5wQ9+QOfOnRk4cCC7du2qLjMszUO0GTYisVCCr8fOnTt57rnnmD9/PoMHD2bx4sWsWrWKZcuWMWvWLMaMGVNr/VMlerdv386oUaOiFhhbt24dmzdvJj+/6gYJTz31FB07duTo0aMMHjyYa6+9ltmzZzN37lw2bNgQcR9vv/02W7Zs4dxzz2X48OGsXr2awsJCJk6cyGuvvUZ+fj7jx4+P3wkRkWZDCb4e9dWDP91VV11Fq1at6NOnD/v27Yu67yFDhlQnd4A5c+awdOlSAPbs2cOOHTvo1KlTvfvIy8sDoH///pSWlpKdnc35559fve/x48czf/78mF+zNF9pP68+Wp0aiTsl+HrUVw8+2vr1FXKrWQM+FAqxfPly1qxZQ/v27SkqKqKioqJB8akGvIjU1GwSfNCnNR46dIgOHTrQvn17tm/fztq1/3tb2zZt2nDixAnatGkT074uuOACdu3aRWlpKT179uTZZ59NVNgiksY0iyZNjBw5ksrKSgoKCrjrrrsYOnRo9bKSkhIKCgr41re+FdO+2rVrx7x586ovEOfm5qoGvEgLpHrwcZYu9eBP1YB3dyZPnkyvXr2YNm1a1G1UDz65QqEQb584N2nHi+sYfALH0lUPPrLG1INXDz6gHn/88eqplYcOHWLixImpDklEkqzZjME3V++88w433HBDrbbMzExef/31hB532rRp9fbYRSTY0jrBuzvhm303WxdffHGdc9jTSToM1YlIfKXtEE1WVhYHDx5U4kkCd+fgwYNkZWWlOhQRiaO07cHn5eVRVlbGgQMHUh1Kg1RUVDTLRJmVlVX9hSkRCYa0TfBt2rSp9S3P5iIUCjFgwIBUhyEikr5DNCIi0jRK8CIiAaUELyISUErwIiIBpQQvIhJQaTuLRkRSTLXbm72Ye/BmlmFmb5vZS+HnHc3sFTPbEf7Zoca6M8xsp5m9a2ZfS0TgIiISXUOGaKYC22o8nw6scPdewIrwc8ysDzAO6AuMBOaZWUZ8whURkVjFlODNLA+4CniiRvNoYEH48QJgTI32Z9z9mLvvBnYCQ+ITroiIxCrWMfj/BH4E1Cx0nuvuewHcfa+ZdQ23dwfW1livLNxWi5mVACUAubm5hEKhhkWepsrLywPzWuIl1efkwNHo5S66tOuSpEhqKy8vp7vvrn/FOAmFPmrYBuWp+SZ5+ReZhE4/tt5TjXof1ZvgzezrwH53f9PMimLYZ6Tyj2dUDHP3+cB8qLrhR1BuCKGbW5wp1edk3oZ5UZeP7T82SZHUFgqF2JHEG35cV9TAG36k6CJrxBt+FI1LSSzppDHvo1h68MOBUWb2r0AWcLaZLQT2mVm3cO+9G7A/vH4ZcF6N7fOABnYdRESkqeodg3f3Ge6e5+49qbp4+qq7fxtYBkwIrzYBeDH8eBkwzswyzSwf6AWsi3vkIiISVVPmwc8GlpjZzcCHwFgAd99iZkuArUAlMNndTzY5UhERaZAGJXh3DwGh8OODwIg61psJzGxibCIi0gQqVSAiElBK8CIiAaUELyISUErwIiIBpQQvIhJQSvAiIgGlBC8iElBK8CIiAaUELyISUErwIiIBpQQvIhJQSvAiIgGlBC8iElBK8CIiAaUELyISUErwIiIBpQQvIhJQSvAiIgGlBC8iElBNuem2iMTgF6+8F7G9e8UxyEpyMNKiqAcvIhJQSvAiIgGlBC8iElBK8CIiAaUELyISUErwIiIBpQQvIhJQmgcv0kLUNR9/2ld7JzkSSZZ6e/BmlmVm68xso5ltMbN7w+0dzewVM9sR/tmhxjYzzGynmb1rZl9L5AsQEZHIYhmiOQZc6e79gP7ASDMbCkwHVrh7L2BF+Dlm1gcYB/QFRgLzzCwjEcGLiEjd6k3wXqU8/LRN+J8Do4EF4fYFwJjw49HAM+5+zN13AzuBIXGNWkRE6hXTRVYzyzCzDcB+4BV3fx3Idfe9AOGfXcOrdwf21Ni8LNwmIiJJFNNFVnc/CfQ3sy8BS83soiirW6RdnLGSWQlQApCbm0soFIollLRXXl4emNcSL6k+J12Odom6PPRpKKHH715xLGJ7my+O0b1id0KPHYtQ6KPIC8rzkxvIqcN+kUno9GPrPdWo91GDZtG4+6dmFqJqbH2fmXVz971m1o2q3j1U9djPq7FZHnDGb5C7zwfmAxQWFnpRUVGDAk9XoVCIoLyWeEn1OZm3YV7U5WP7j03o8euuJrmbf2SlJonWdF1RHbNoVv40uYGEhcrzKco+7YOvaFxKYkknjXkf1ZvgzawLcCKc3NsB/wLcDywDJgCzwz9fDG+yDFhsZg8B5wK9gHUNikqkAepL4CItVSw9+G7AgvBMmFbAEnd/yczWAEvM7GbgQ2AsgLtvMbMlwFagEpgcHuIREZEkqjfBu/smYECE9oPAiDq2mQnMbHJ0IiLSaCpVICISUCpVICLpL9oF3+IZyYujmVEPXkQkoNSDF4mivhk6k/pPSlIkIg2nHryISEApwYuIBJQSvIhIQCnBi4gElBK8iEhAaRaNNAuJrDejWjYSVOrBi4gElHrwkhbUixaJP/XgRUQCSgleRCSglOBFRAJKCV5EJKB0kVUaJNrFUBXeaqZSdO9VSTz14EVEAkoJXkQkoJTgRUQCSgleRCSglOBFRAJKCV5EJKCU4EVEAkoJXkQkoJTgRUQCSgleRCSgVKpApAlUukHSmXrwIiIBVW+CN7PzzGylmW0zsy1mNjXc3tHMXjGzHeGfHWpsM8PMdprZu2b2tUS+ABERiSyWIZpK4HZ3f8vMcoA3zewV4EZghbvPNrPpwHTgTjPrA4wD+gLnAsvNrLe7n0zMSxBJD2veP1jr+bED76UoEpEq9fbg3X2vu78VfnwY2AZ0B0YDC8KrLQDGhB+PBp5x92PuvhvYCQyJd+AiIhKduXvsK5v1BF4DLgI+dPcv1Vj2ibt3MLO5wFp3XxhufxL4s7s/f9q+SoASgNzc3EHPPPNME19KeigvLyc7OzvVYSTMgaMH6lzWpV2XiO2xnJNo+20uyo9V1np+VkbHqOu3+eIYJ1plJjKkmHTlk1SHUEv5F5lktzoW+wY5/ydxwaSRSO+j4uLiN929sK5tYp5FY2bZwAvAv7v7Z2ZW56oR2s74FHH3+cB8gMLCQi8qKoo1lLQWCoUIymuJJNqskbH9x0Zsj+WcRNtvc3H6EM3As66Pun73it38Iys/kSHF5LrWb6U6hFpC5fkUZe+OfYOicYkLJo00JrfENIvGzNpQldwXufvvw837zKxbeHk3YH+4vQw4r8bmecBHDYpKRESaLJZZNAY8CWxz94dqLFoGTAg/ngC8WKN9nJllmlk+0AtYF7+QRUQkFrEM0QwHbgDeMbMN4bb/AGYDS8zsZuBDYCyAu28xsyXAVqpm4EzWDBoRkeSrN8G7+yoij6sDjKhjm5nAzCbEJSJJsmbXwTqXXXp+pyRGIvGmb7KKiASUEryISECp2JiING8rf1r3suIZyYsjDakHLyISUErwIiIBpQQvIhJQSvAiIgGlBC8iElBK8CIiAaUELyISUErwIiIBpQQvIhJQ+iarxE1dN+3ocrQL8zbMY1L/SUmOSKRlU4KXpAnCXZtEmhMleJEEeeuzZ6Mu795W96KXxFKCF2kBhn44P9UhSAoowYs0wOk31hZJZ5pFIyISUErwIiIBpQQvIhJQSvAiIgGlBC8iElBK8CIiAaVpkiIp8vnJf/LWZ+siLht49vVJjkaCSD14EZGAUg9ealG9GJHgUA9eRCSg1INvYdRDF2k51IMXEQmoenvwZvYU8HVgv7tfFG7rCDwL9ARKgevc/ZPwshnAzcBJ4DZ3/0tCIhcJsPpKDWuWjcQiliGap4G5wG9qtE0HVrj7bDObHn5+p5n1AcYBfYFzgeVm1tvdT8Y3bBFJhjW76q6eeen5nZIYiTRGvQne3V8zs56nNY8GisKPFwAh4M5w+zPufgzYbWY7gSHAmviEK5J4KgksQdHYi6y57r4XwN33mlnXcHt3YG2N9crCbWcwsxKgBCA3N5dQKNTIUNJLeXl5Wr+WLke7JP2YrU+0psve5B+3sa7M6JCU45xNJldmfLlR255VsbtB6//znP6NOk40ofLEzNEo/yKTUHl+fHaWxu/FhmpMbon3/5BFaPNIK7r7fGA+QGFhoRcVFcU5lNQIhUKk82tJxSyaLnu7cKDbgaQft7GS1YO/MuPLvHpyZ6O2HXhWw8bgh+5/pVHHiSZRQzSh8nyKshv2AVanonHx2U8aaExuaWyC32dm3cK9927A/nB7GXBejfXygI8aeQwRaQDdlk9O19gEvwyYAMwO/3yxRvtiM3uIqousvYDIxTZERBJt5U/rXlY8I3lxpEgs0yR/R9UF1c5mVgbcTVViX2JmNwMfAmMB3H2LmS0BtgKVwGTNoImsvqGSSf0nJSkSEQmqWGbRjK9j0Yg61p8JzGxKUCIi0nT6JquISECpFk0Aqd6MiIB68CIigaUELyISUBqiaYY0BCMisVAPXkQkoJTgRUQCSkM00mKpaqQEnXrwIiIBpQQvIhJQSvAiIgGlBC8iElC6yJogmqsuIqmmHryISEApwYuIBJSGaCTQNNddWjL14EVEAko9eJFmJO+zNwEY+umhFEcSAHXdrzVA92pVgm+CSDNluhztEpcZNJqFI+luza7Iw1+Xnt8pyZFIXTREIyISUErwIiIBpSEaafZa4kyZZa12Rl0+6osvJymSM9U1dAMavkk29eBFRAJKPXiRNHNqpoxIUynBi4jUVNf0SWh2UyiV4KPQVMX00hLH2kWaQglepAmiDaeUnT0oiZHUFu0irC7ANkEz690rwUvSReuJX/p/m8GbPEb1jaW3PftcjbdLQinBS5PEMmxyZUYHDa+IpEDCEryZjQQeBjKAJ9x9dqKO1RQaZ08vqfwgSNfhlnirbw59NKkc3pGGS0iCN7MM4FHgq0AZ8IaZLXP3rYk4npJ0emt78kidyTNIiVOa5tT4/OfnnMea/f/7Qd8sxubTVKJ68EOAne6+C8DMngFGAwlJ8EGWzB5tXePfscQQdSz57HMbtV1jk38ixrU1Vl4lFd+gbTYXZqNdgI0mgRdnzd3jv1OzbwIj3f3fws9vAC5x9yk11ikBSsJPvwK8G/dAUqMz8D+pDiLN6JxEpvMSmc5LZJHOSw9371LXBonqwVuEtlqfJO4+H5ifoOOnjJmtd/fCVMeRTnROItN5iUznJbLGnJdE1aIpA86r8TwP+ChBxxIRkQgSleDfAHqZWb6ZtQXGAcsSdCwREYkgIUM07l5pZlOAv1A1TfIpd9+SiGOlocANO8WBzklkOi+R6bxE1uDzkpCLrCIiknqqBy8iElBK8CIiAaUEHwdmdp6ZrTSzbWa2xcympjqmdGJmGWb2tpm9lOpY0oWZfcnMnjez7eHfm0tTHVM6MLNp4ffQZjP7nZllpTqmVDCzp8xsv5ltrtHW0cxeMbMd4Z8d6tuPEnx8VAK3u/uFwFBgspn1SXFM6WQqsC3VQaSZh4H/cvcLgH7o/GBm3YHbgEJ3v4iqCRrjUhtVyjwNjDytbTqwwt17ASvCz6NSgo8Dd9/r7m+FHx+m6s3aPbVRpQczywOuAp5IdSzpwszOBq4AngRw9+Pu/mlqo0obrYF2ZtYaaE8L/f6Mu78G/PO05tHAgvDjBcCY+vajBB9nZtYTGAC8ntpI0sZ/Aj8Cvkh1IGnkfOAA8Ovw0NUTZnZWqoNKNXf/B/Ag8CGwFzjk7v+d2qjSSq6774WqTiXQtb4NlODjyMyygReAf3f3z1IdT6qZ2deB/e6uSl21tQYGAr909wHA58Tw53bQhceURwP5wLnAWWb27dRG1bwpwceJmbWhKrkvcvffpzqeNDEcGGVmpcAzwJVmtjC1IaWFMqDM3U/9lfc8VQm/pfsXYLe7H3D3E8DvgWEpjimd7DOzbgDhn/vr20AJPg7MzKgaT93m7g+lOp504e4z3D3P3XtSdbHsVXdv8T0yd/8Y2GNmXwk3jUCltKFqaGaombUPv6dGoIvPNS0DJoQfTwBerG8D3bIvPoYDNwDvmNmGcNt/uPvLKYxJ0tv3gUXhWk27gJtSHE/KufvrZvY88BZVM9PepoWWLTCz3wFFQGczKwPuBmYDS8zsZqo+DMfWux+VKhARCSYN0YiIBJQSvIhIQCnBi4gElBK8iEhAKcGLiASUEryISEApwYuIBNT/B3VWv4ObTzLEAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "person_metadata[cohort].avg_rating.hist(bins=30, alpha=0.5, label='avg_rating')\n", + "person_metadata[cohort].max_rating.hist(bins=30, alpha=0.5, label='max_rating')\n", + "person_metadata[cohort].min_rating.hist(bins=30, alpha=0.5, label='min_rating')\n", + "\n", + "plt.legend(loc='best')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "person_metadata['link'] = (\n", + " person_metadata.index.to_series().apply(lambda nconst: f'link')\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_filmsavg_ratingmax_ratingmin_ratinglink
nconst
nm0062362177.1117658.16.3link
nm0857621187.4277788.06.7link
nm0311508197.2210538.16.6link
nm0501921117.9818188.67.1link
nm0598971117.3727278.06.6link
nm0160883147.3142867.96.3link
nm3763301127.0500007.46.6link
nm0316795217.3904768.06.4link
nm2953454117.9090918.66.9link
nm0858128138.3000009.07.5link
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = person_metadata[cohort].sort_values('min_rating', ascending=False)[:100].sample(10)\n", + "HTML(\n", + " person_metadata[cohort]\n", + " .sort_values('min_rating', ascending=False)[:100]\n", + " .sample(10)\n", + " .to_html(escape=False)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/clase-3/03-stars-features.ipynb b/notebooks/clase-3/03-stars-features.ipynb new file mode 100644 index 0000000..ef8cfbb --- /dev/null +++ b/notebooks/clase-3/03-stars-features.ipynb @@ -0,0 +1,1194 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.pipeline import make_pipeline, make_union\n", + "from sklearn.feature_extraction import DictVectorizer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.metrics import roc_auc_score\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('/Users/przivic/prog/machine_learning_practico')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from lib import data, transformers\n", + "from lib.model import get_features_pipe, get_model_pipe" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading title basics...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/przivic/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3343: DtypeWarning: Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading title ratings...\n", + "Loading movie directors...\n", + "Merging everything...\n" + ] + } + ], + "source": [ + "PATH = Path('../../data/')\n", + "movies_df = data.load_data(PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "principals_df = pd.read_csv(PATH / 'title.principals.tsv', sep='\\t')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tconstorderingnconstcategoryjobcharacters
0tt00000011nm1588970self\\N[\"Self\"]
1tt00000012nm0005690director\\N\\N
2tt00000013nm0374658cinematographerdirector of photography\\N
3tt00000021nm0721526director\\N\\N
4tt00000022nm1335271composer\\N\\N
\n", + "
" + ], + "text/plain": [ + " tconst ordering nconst category job \\\n", + "0 tt0000001 1 nm1588970 self \\N \n", + "1 tt0000001 2 nm0005690 director \\N \n", + "2 tt0000001 3 nm0374658 cinematographer director of photography \n", + "3 tt0000002 1 nm0721526 director \\N \n", + "4 tt0000002 2 nm1335271 composer \\N \n", + "\n", + " characters \n", + "0 [\"Self\"] \n", + "1 \\N \n", + "2 \\N \n", + "3 \\N \n", + "4 \\N " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "principals_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "actor 9428011\n", + "actress 7087536\n", + "self 6992466\n", + "writer 5443248\n", + "director 4739306\n", + "producer 2533985\n", + "cinematographer 1472541\n", + "composer 1461940\n", + "editor 1375324\n", + "production_designer 302528\n", + "archive_footage 245691\n", + "archive_sound 2605\n", + "Name: category, dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "principals_df.category.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Sacado del codigo de directores\n", + "\n", + "movies_stars = principals_df[principals_df.category.isin(['actress', 'actor'])].copy()\n", + "\n", + "# Calculo un ranking por pelicula segun el ordering\n", + "movies_stars['star_rank'] = (\n", + " movies_stars.sort_values('ordering')\n", + " .groupby('tconst')\n", + " .cumcount()\n", + ")\n", + "\n", + "first_star = movies_stars[movies_stars.star_rank == 0][['nconst', 'tconst']].rename(columns={'nconst': '1st_star'})\n", + "second_star = movies_stars[movies_stars.star_rank == 1][['nconst', 'tconst']].rename(columns={'nconst': '2nd_star'})\n", + "third_star = movies_stars[movies_stars.star_rank == 2][['nconst', 'tconst']].rename(columns={'nconst': '3rd_star'})" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "stars_df = (\n", + " first_star.merge(second_star, how='left', on='tconst')\n", + " .merge(third_star, how='left', on='tconst')\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
1st_startconst2nd_star3rd_star
0nm0443482tt0000005nm0653042NaN
1nm0179163tt0000007nm0183947NaN
2nm0653028tt0000008NaNNaN
3nm0063086tt0000009nm0183823nm1309758
4nm3692297tt0000011NaNNaN
\n", + "
" + ], + "text/plain": [ + " 1st_star tconst 2nd_star 3rd_star\n", + "0 nm0443482 tt0000005 nm0653042 NaN\n", + "1 nm0179163 tt0000007 nm0183947 NaN\n", + "2 nm0653028 tt0000008 NaN NaN\n", + "3 nm0063086 tt0000009 nm0183823 nm1309758\n", + "4 nm3692297 tt0000011 NaN NaN" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stars_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
1st_startconst2nd_star3rd_star
107363nm0000138tt0120338nm0000701nm0000708
\n", + "
" + ], + "text/plain": [ + " 1st_star tconst 2nd_star 3rd_star\n", + "107363 nm0000138 tt0120338 nm0000701 nm0000708" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stars_df[stars_df.tconst == 'tt0120338']" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "movies_df = movies_df.merge(stars_df, on='tconst', how='left')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tconsttitleTypeprimaryTitleoriginalTitleisAdultstartYearendYearruntimeMinutesgenresaverageRatingnumVotesdirector1st_star2nd_star3rd_star
0tt0000009movieMiss JerryMiss Jerry01894.0\\N45.0[Romance]5.9154nm0085156nm0063086nm0183823nm1309758
1tt0000147movieThe Corbett-Fitzsimmons FightThe Corbett-Fitzsimmons Fight01897.0\\N20.0[Documentary, News, Sport]5.2356nm0714557NaNNaNNaN
2tt0000502movieBohemiosBohemios01905.0\\N100.0[no-genre]3.86nm0063413nm0215752nm0252720NaN
3tt0000574movieThe Story of the Kelly GangThe Story of the Kelly Gang01906.0\\N70.0[Biography, Crime, Drama]6.1589nm0846879nm0846887nm0846894nm3002376
4tt0000679movieThe Fairylogue and Radio-PlaysThe Fairylogue and Radio-Plays01908.0\\N120.0[Adventure, Fantasy]5.237nm0091767nm0000875nm0122665nm0933446
\n", + "
" + ], + "text/plain": [ + " tconst titleType primaryTitle \\\n", + "0 tt0000009 movie Miss Jerry \n", + "1 tt0000147 movie The Corbett-Fitzsimmons Fight \n", + "2 tt0000502 movie Bohemios \n", + "3 tt0000574 movie The Story of the Kelly Gang \n", + "4 tt0000679 movie The Fairylogue and Radio-Plays \n", + "\n", + " originalTitle isAdult startYear endYear runtimeMinutes \\\n", + "0 Miss Jerry 0 1894.0 \\N 45.0 \n", + "1 The Corbett-Fitzsimmons Fight 0 1897.0 \\N 20.0 \n", + "2 Bohemios 0 1905.0 \\N 100.0 \n", + "3 The Story of the Kelly Gang 0 1906.0 \\N 70.0 \n", + "4 The Fairylogue and Radio-Plays 0 1908.0 \\N 120.0 \n", + "\n", + " genres averageRating numVotes director 1st_star \\\n", + "0 [Romance] 5.9 154 nm0085156 nm0063086 \n", + "1 [Documentary, News, Sport] 5.2 356 nm0714557 NaN \n", + "2 [no-genre] 3.8 6 nm0063413 nm0215752 \n", + "3 [Biography, Crime, Drama] 6.1 589 nm0846879 nm0846887 \n", + "4 [Adventure, Fantasy] 5.2 37 nm0091767 nm0000875 \n", + "\n", + " 2nd_star 3rd_star \n", + "0 nm0183823 nm1309758 \n", + "1 NaN NaN \n", + "2 nm0252720 NaN \n", + "3 nm0846894 nm3002376 \n", + "4 nm0122665 nm0933446 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Ahora vamos a experimentar!\n", + "\n", + "Como podemos hacer para usar 1st_star y 2nd_star con el código que **ya** tenemos? [Miremos el diff](https://github.com/elsonidoq/machine_learning_practico/commit/1244da3daee2f7aff140d202885e6e8dba55c099)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "rating_data = data.load_rating_train_dev_test(movies_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = make_pipeline(\n", + " transformers.CrewFeatures('1st_star', min_cnt_movies=3),\n", + " DictVectorizer(sparse=False),\n", + " StandardScaler(),\n", + " LogisticRegression()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.6694917991958195, 0.5516081278474014)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.fit(rating_data['X_train'], rating_data['y_train'] > 7.5)\n", + "\n", + "tr_auc = roc_auc_score(rating_data['y_train'] > 7.5, pipe.predict_proba(rating_data['X_train'])[:, 1])\n", + "dev_auc = roc_auc_score(rating_data['y_dev'] > 7.5, pipe.predict_proba(rating_data['X_dev'])[:, 1])\n", + "\n", + "tr_auc, dev_auc" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = make_pipeline(\n", + " make_union(\n", + " make_pipeline(transformers.CrewFeatures('1st_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.CrewFeatures('2nd_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.CrewFeatures('3rd_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " ),\n", + " StandardScaler(),\n", + " LogisticRegression()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.7266249341879818, 0.5782582569516197)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.fit(rating_data['X_train'], rating_data['y_train'] > 7.5)\n", + "\n", + "tr_auc = roc_auc_score(rating_data['y_train'] > 7.5, pipe.predict_proba(rating_data['X_train'])[:, 1])\n", + "dev_auc = roc_auc_score(rating_data['y_dev'] > 7.5, pipe.predict_proba(rating_data['X_dev'])[:, 1])\n", + "\n", + "tr_auc, dev_auc" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Probando todo junto" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = make_pipeline(\n", + " make_union(\n", + " make_pipeline(transformers.CrewFeatures('1st_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.CrewFeatures('2nd_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.CrewFeatures('3rd_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.CrewFeatures('director', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.YearsAgo(), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.GenreDummies(), DictVectorizer(sparse=False)),\n", + " ),\n", + " StandardScaler(),\n", + " LogisticRegression()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.8433724090930335, 0.7368580621136858)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.fit(rating_data['X_train'], rating_data['y_train'] > 7.5)\n", + "\n", + "tr_auc = roc_auc_score(rating_data['y_train'] > 7.5, pipe.predict_proba(rating_data['X_train'])[:, 1])\n", + "dev_auc = roc_auc_score(rating_data['y_dev'] > 7.5, pipe.predict_proba(rating_data['X_dev'])[:, 1])\n", + "\n", + "tr_auc, dev_auc" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = make_pipeline(\n", + " make_union(\n", + " make_pipeline(transformers.CrewFeatures('director', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.YearsAgo(), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.GenreDummies(), DictVectorizer(sparse=False)),\n", + " ),\n", + " StandardScaler(),\n", + " LogisticRegression()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.8189300131210699, 0.7315217582213612)" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.fit(rating_data['X_train'], rating_data['y_train'] > 7.5)\n", + "\n", + "tr_auc = roc_auc_score(rating_data['y_train'] > 7.5, pipe.predict_proba(rating_data['X_train'])[:, 1])\n", + "dev_auc = roc_auc_score(rating_data['y_dev'] > 7.5, pipe.predict_proba(rating_data['X_dev'])[:, 1])\n", + "\n", + "tr_auc, dev_auc" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = make_pipeline(\n", + " make_union(\n", + " make_pipeline(transformers.YearsAgo(), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.GenreDummies(), DictVectorizer(sparse=False)),\n", + " ),\n", + " StandardScaler(),\n", + " LogisticRegression()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.7419233952069703, 0.7288300431976487)" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.fit(rating_data['X_train'], rating_data['y_train'] > 7.5)\n", + "\n", + "tr_auc = roc_auc_score(rating_data['y_train'] > 7.5, pipe.predict_proba(rating_data['X_train'])[:, 1])\n", + "dev_auc = roc_auc_score(rating_data['y_dev'] > 7.5, pipe.predict_proba(rating_data['X_dev'])[:, 1])\n", + "\n", + "tr_auc, dev_auc" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.ensemble import GradientBoostingClassifier\n", + "\n", + "pipe = make_pipeline(\n", + " make_union(\n", + " make_pipeline(transformers.CrewFeatures('1st_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.CrewFeatures('2nd_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.CrewFeatures('3rd_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.CrewFeatures('director', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.YearsAgo(), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.GenreDummies(), DictVectorizer(sparse=False)),\n", + " ),\n", + " GradientBoostingClassifier(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.9175962044769267, 0.7461476500702391)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.fit(rating_data['X_train'], rating_data['y_train'] > 7.5)\n", + "\n", + "tr_auc = roc_auc_score(rating_data['y_train'] > 7.5, pipe.predict_proba(rating_data['X_train'])[:, 1])\n", + "dev_auc = roc_auc_score(rating_data['y_dev'] > 7.5, pipe.predict_proba(rating_data['X_dev'])[:, 1])\n", + "\n", + "tr_auc, dev_auc" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Word2Vec features" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "class EpochSaver: pass\n", + "\n", + "from gensim.models import Word2Vec\n", + "\n", + "w2v = Word2Vec.load('../../data/w2v/epoch_10')" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(100,)" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "\n", + "default_vector = np.mean(w2v.wv.vectors, axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODO: revisar a quien se parece este default_vector\n", + "\n", + "Seria mejor un vector se 0s? alguna otra agregacion sobre los datos?" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "x_i = rating_data['X_train'][0]\n", + "fields = ['1st_star', '2nd_star', '3rd_star', 'director']\n", + "min_cnt_movies = 2\n", + "\n", + "vectors = []\n", + "for field in fields:\n", + " person_id = x_i[field]\n", + " if person_id not in w2v.wv or w2v.wv.vocab[person_id].count < min_cnt_movies: continue\n", + " \n", + " vectors.append(w2v.wv[person_id])\n", + "\n", + "if len(vectors) == 0:\n", + " result = default_vector\n", + "else:\n", + " result = np.mean(vectors, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn.base import BaseEstimator, TransformerMixin\n", + "\n", + "class W2VCrewFeatures(BaseEstimator, TransformerMixin):\n", + " def __init__(self, w2v, fields, min_cnt_movies=2):\n", + " self.fields = fields\n", + " self.min_cnt_movies = min_cnt_movies\n", + " self.w2v = w2v\n", + "\n", + " def fit(self, X, y):\n", + " self.default_vector_ = np.mean(w2v.wv.vectors, axis=0)\n", + " return self\n", + " \n", + " def _get_movie_vector(self, x_i):\n", + " vectors = []\n", + " for field in self.fields:\n", + " person_id = x_i[field]\n", + " if person_id not in self.w2v.wv or self.w2v.wv.vocab[person_id].count < self.min_cnt_movies: continue\n", + "\n", + " vectors.append(self.w2v.wv[person_id])\n", + "\n", + " if len(vectors) == 0:\n", + " return self.default_vector_\n", + " else:\n", + " return np.mean(vectors, axis=0)\n", + " \n", + " def transform(self, X):\n", + " return np.asarray([self._get_movie_vector(x_i) for x_i in X])" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = make_pipeline(\n", + " W2VCrewFeatures(w2v, ['1st_star', '2nd_star', '3rd_star']),\n", + " StandardScaler(),\n", + " LogisticRegression()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/przivic/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:760: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", + "\n", + "Increase the number of iterations (max_iter) or scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " n_iter_i = _check_optimize_result(\n" + ] + }, + { + "data": { + "text/plain": [ + "(0.7050786145493632, 0.6466820465336702)" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.fit(rating_data['X_train'], rating_data['y_train'] > 7.5)\n", + "\n", + "tr_auc = roc_auc_score(rating_data['y_train'] > 7.5, pipe.predict_proba(rating_data['X_train'])[:, 1])\n", + "dev_auc = roc_auc_score(rating_data['y_dev'] > 7.5, pipe.predict_proba(rating_data['X_dev'])[:, 1])\n", + "\n", + "tr_auc, dev_auc" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = make_pipeline(\n", + " W2VCrewFeatures(w2v, ['1st_star', '2nd_star', '3rd_star', 'director']),\n", + " StandardScaler(),\n", + " LogisticRegression()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/przivic/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:760: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", + "\n", + "Increase the number of iterations (max_iter) or scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " n_iter_i = _check_optimize_result(\n" + ] + }, + { + "data": { + "text/plain": [ + "(0.7259572017485352, 0.6663948172895163)" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.fit(rating_data['X_train'], rating_data['y_train'] > 7.5)\n", + "\n", + "tr_auc = roc_auc_score(rating_data['y_train'] > 7.5, pipe.predict_proba(rating_data['X_train'])[:, 1])\n", + "dev_auc = roc_auc_score(rating_data['y_dev'] > 7.5, pipe.predict_proba(rating_data['X_dev'])[:, 1])\n", + "\n", + "tr_auc, dev_auc" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = make_pipeline(\n", + " make_union(\n", + " W2VCrewFeatures(w2v, ['1st_star', '2nd_star', '3rd_star']),\n", + " W2VCrewFeatures(w2v, ['director'])\n", + " ),\n", + " StandardScaler(),\n", + " LogisticRegression()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/przivic/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py:760: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", + "\n", + "Increase the number of iterations (max_iter) or scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " n_iter_i = _check_optimize_result(\n" + ] + }, + { + "data": { + "text/plain": [ + "(0.733849182294092, 0.6649772550701968)" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.fit(rating_data['X_train'], rating_data['y_train'] > 7.5)\n", + "\n", + "tr_auc = roc_auc_score(rating_data['y_train'] > 7.5, pipe.predict_proba(rating_data['X_train'])[:, 1])\n", + "dev_auc = roc_auc_score(rating_data['y_dev'] > 7.5, pipe.predict_proba(rating_data['X_dev'])[:, 1])\n", + "\n", + "tr_auc, dev_auc" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "pipe = make_pipeline(\n", + " make_union(\n", + " W2VCrewFeatures(w2v, ['1st_star', '2nd_star', '3rd_star']),\n", + " make_pipeline(transformers.CrewFeatures('1st_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.CrewFeatures('2nd_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " make_pipeline(transformers.CrewFeatures('3rd_star', min_cnt_movies=3), DictVectorizer(sparse=False)),\n", + " ),\n", + " GradientBoostingClassifier()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipe.fit(rating_data['X_train'], rating_data['y_train'] > 7.5)\n", + "\n", + "tr_auc = roc_auc_score(rating_data['y_train'] > 7.5, pipe.predict_proba(rating_data['X_train'])[:, 1])\n", + "dev_auc = roc_auc_score(rating_data['y_dev'] > 7.5, pipe.predict_proba(rating_data['X_dev'])[:, 1])\n", + "\n", + "tr_auc, dev_auc" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/clase-3/04-crew-embeddings.ipynb b/notebooks/clase-3/04-crew-embeddings.ipynb new file mode 100644 index 0000000..23dfe7c --- /dev/null +++ b/notebooks/clase-3/04-crew-embeddings.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting gensim\n", + " Downloading gensim-3.8.3-cp38-cp38-macosx_10_9_x86_64.whl (24.2 MB)\n", + "\u001b[K |████████████████████████████████| 24.2 MB 9.4 MB/s eta 0:00:01 |██████████████████████ | 16.7 MB 8.9 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: scipy>=0.18.1 in /Users/przivic/anaconda3/lib/python3.8/site-packages (from gensim) (1.5.0)\n", + "Requirement already satisfied: numpy>=1.11.3 in /Users/przivic/anaconda3/lib/python3.8/site-packages (from gensim) (1.18.5)\n", + "Requirement already satisfied: six>=1.5.0 in /Users/przivic/anaconda3/lib/python3.8/site-packages (from gensim) (1.11.0)\n", + "Collecting smart-open>=1.8.1\n", + " Downloading smart_open-3.0.0.tar.gz (113 kB)\n", + "\u001b[K |████████████████████████████████| 113 kB 6.2 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: requests in /Users/przivic/anaconda3/lib/python3.8/site-packages (from smart-open>=1.8.1->gensim) (2.19.1)\n", + "Requirement already satisfied: idna<2.8,>=2.5 in /Users/przivic/anaconda3/lib/python3.8/site-packages (from requests->smart-open>=1.8.1->gensim) (2.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/przivic/anaconda3/lib/python3.8/site-packages (from requests->smart-open>=1.8.1->gensim) (2018.11.29)\n", + "Requirement already satisfied: urllib3<1.24,>=1.21.1 in /Users/przivic/anaconda3/lib/python3.8/site-packages (from requests->smart-open>=1.8.1->gensim) (1.23)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/przivic/anaconda3/lib/python3.8/site-packages (from requests->smart-open>=1.8.1->gensim) (3.0.4)\n", + "Building wheels for collected packages: smart-open\n", + " Building wheel for smart-open (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for smart-open: filename=smart_open-3.0.0-py3-none-any.whl size=107097 sha256=564a8f8126de510dc7cb44407b867cfd4ca62a11085c95255994d564825851d0\n", + " Stored in directory: /Users/przivic/Library/Caches/pip/wheels/11/73/9a/f91ac1f1816436b16423617c5be5db048697ff152a9c4346f2\n", + "Successfully built smart-open\n", + "Installing collected packages: smart-open, gensim\n", + "Successfully installed gensim-3.8.3 smart-open-3.0.0\n", + "\u001b[33mWARNING: You are using pip version 20.1; however, version 20.2.3 is available.\n", + "You should consider upgrading via the '/Users/przivic/anaconda3/bin/python -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install gensim" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "PATH = Path('../../data/')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('/Users/przivic/prog/machine_learning_practico')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from lib import data, transformers\n", + "from lib.model import get_features_pipe, get_model_pipe" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm.notebook import tqdm\n", + "import csv" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "from gensim.models.callbacks import CallbackAny2Vec\n", + "from itertools import groupby\n", + "\n", + "class IterSentences:\n", + " def __init__(self, data_path, some=False):\n", + " self.data_path = data_path\n", + " self.some = some\n", + " \n", + " def __iter__(self):\n", + " reader = csv.DictReader((self.data_path / 'title.principals.tsv').open(), delimiter='\\t')\n", + " for i, (tconst, rows) in enumerate(groupby(tqdm(reader), lambda x: x['tconst'])):\n", + " if self.some and i == 1000: break\n", + " yield [e['nconst'] for e in sorted(rows, key=lambda x: int(x['ordering']))]\n", + " \n", + "\n", + "class EpochSaver(CallbackAny2Vec):\n", + " def __init__(self, data_path, fname_prefix=''):\n", + " self.data_path = data_path\n", + " self.fname_prefix = fname_prefix\n", + " self.epoch = 1\n", + " \n", + " def on_epoch_end(self, model):\n", + " print(f'Finished epoch {self.epoch}. Saving...')\n", + " prefix = f'{self.fname_prefix}_' if self.fname_prefix else ''\n", + " \n", + " output_path = self.data_path / 'w2v' / f'{prefix}epoch_{self.epoch}'\n", + " output_path.parent.mkdir(parents=True, exist_ok=True)\n", + " \n", + " model.save(str(output_path))\n", + " self.epoch += 1" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "from gensim.models import Word2Vec" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "model = Word2Vec(window=10, iter=10, callbacks=[EpochSaver(PATH)])" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1a5979e0d5b245a488b4f2156ee1f406", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "model.build_vocab(IterSentences(PATH))" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "40f21178d2444484bbccee8ca152c559", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Finished epoch 1. Saving...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "48ac47938826485e988a3cc0fec1eb8c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Finished epoch 2. Saving...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "74a26ca096874c208ee6a278f5ec21c7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Finished epoch 3. Saving...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c89cb8874ed64234b40fe1e5d19740dd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Finished epoch 4. Saving...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c4ab9691fffd4fc28fc042a92ad6498a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Finished epoch 5. Saving...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "47eb4c316a524cc8a7ab0f2e6342cf22", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Finished epoch 6. Saving...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3e46ff92ae674587ab24a31756eb50d8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Finished epoch 7. Saving...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cfb1efa6f61241729d0d0a393d9f710a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Finished epoch 8. Saving...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5f9e155b7978459181172bcc5c5281b4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Finished epoch 9. Saving...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ec3186bce1bb426ca00a63290ee41679", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Finished epoch 10. Saving...\n" + ] + }, + { + "data": { + "text/plain": [ + "(362470430, 410851810)" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.train(IterSentences(PATH), total_words=model.corpus_total_words, epochs=10)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/clase-3/05-crew-embeddings-check.ipynb b/notebooks/clase-3/05-crew-embeddings-check.ipynb new file mode 100644 index 0000000..8bf7010 --- /dev/null +++ b/notebooks/clase-3/05-crew-embeddings-check.ipynb @@ -0,0 +1,2218 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install beautifulsoup4" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from gensim.models import Word2Vec" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class EpochSaver: pass" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_HOME = '../../data/w2v'\n", + "DATA_HOME = '../../../machine_learning_practico/data/w2v/'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir -p $DATA_HOME\n", + "!wget https://machine-learning-practico.s3.amazonaws.com/w2v/epoch_10 -O $DATA_HOME/epoch_10\n", + "!wget https://machine-learning-practico.s3.amazonaws.com/w2v/epoch_10.trainables.syn1neg.npy -O $DATA_HOME/epoch_10.trainables.syn1neg.npy\n", + "!wget https://machine-learning-practico.s3.amazonaws.com/w2v/epoch_10.wv.vectors.npy -O $DATA_HOME/epoch_10.wv.vectors.npy" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "DATA_HOME = Path(DATA_HOME)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "model = Word2Vec.load(str(DATA_HOME / 'epoch_10'))" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import HTML, display\n", + "from bs4 import BeautifulSoup\n", + "import requests\n", + "import requests_cache\n", + "\n", + "requests_cache.install_cache('imdb')\n", + "\n", + "def get_name(id):\n", + " response = requests.get(f'https://www.imdb.com/name/{id}/')\n", + " soup = BeautifulSoup(response.content)\n", + " return soup.select('.header .itemprop')[0].text\n", + "\n", + "def get_image(id):\n", + " response = requests.get(f'https://www.imdb.com/name/{id}/')\n", + " soup = BeautifulSoup(response.content)\n", + " candidates = soup.select('#name-poster')\n", + " return candidates[0].attrs['src'] if candidates else 'https://m.media-amazon.com/images/G/01/imdb/images/nopicture/medium/name-2135195744._CB466677935_.png'\n", + "\n", + "def render_person(id):\n", + " name = get_name(id)\n", + " picture = get_image(id)\n", + " return f\"\"\"\n", + "
\n", + "

{name}

\n", + "
{id}
\n", + " \n", + " \n", + " \n", + "
\n", + " \"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "def show_similars(id, n=10):\n", + " display(HTML(render_person(id)))\n", + " renders = []\n", + " for similar_id, score in model.wv.most_similar(id, topn=n):\n", + " renders.append(render_person(similar_id))\n", + " \n", + " carousel = ''.join(\n", + " [\n", + " f'
{p}
' \n", + " for p in renders\n", + " ]\n", + " )\n", + " display(HTML(f'
{carousel}
'))" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Angelina Jolie

\n", + "
nm0001401
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Leonardo DiCaprio

\n", + "
nm0000138
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Harrison Ford

\n", + "
nm0000148
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Brad Pitt

\n", + "
nm0000093
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Tom Cruise

\n", + "
nm0000129
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Johnny Depp

\n", + "
nm0000136
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Christian Bale

\n", + "
nm0000288
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Uma Thurman

\n", + "
nm0000235
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Robert Downey Jr.

\n", + "
nm0000375
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Kevin Costner

\n", + "
nm0000126
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Jennifer Connelly

\n", + "
nm0000124
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_similars('nm0001401')" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Ricardo Darín

\n", + "
nm0201857
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Federico Luppi

\n", + "
nm0527002
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Fabián Bielinsky

\n", + "
nm0081433
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Ernesto Sábato

\n", + "
nm0844531
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Cindy Teperman

\n", + "
nm4014688
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Pablo Trapero

\n", + "
nm0871086
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Chino Darín

\n", + "
nm3779182
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Andrés Duprat

\n", + "
nm3321995
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Miguel Cohan

\n", + "
nm0169135
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Eduardo Mignogna

\n", + "
nm0586002
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Alejo Flah

\n", + "
nm0994620
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_similars('nm0201857')" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Marcelo Tinelli

\n", + "
nm0863906
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Flavio Mendoza

\n", + "
nm4863059
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Angel De Brito

\n", + "
nm1376908
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Dalma Maradona

\n", + "
nm0989607
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Facundo Mazzei

\n", + "
nm7128020
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Fernando Bertona

\n", + "
nm7128063
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Anibal Pachano

\n", + "
nm4703260
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Marcelo Polino

\n", + "
nm1019916
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Ricardo Fort

\n", + "
nm4662591
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Juan Battaglia

\n", + "
nm5300999
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Soledad Bayona

\n", + "
nm7196893
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_similars('nm0863906')" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Jared Padalecki

\n", + "
nm0655585
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Eric Kripke

\n", + "
nm0471392
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Jensen Ackles

\n", + "
nm0010075
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Todd Aronauer

\n", + "
nm0036896
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Davy Perez

\n", + "
nm3053945
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Misha Collins

\n", + "
nm0172557
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Robert Berens

\n", + "
nm2749064
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Meredith Glynn

\n", + "
nm3728867
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Annalyn Hostert

\n", + "
nm9514713
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Don Koch

\n", + "
nm0462287
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Jenny Klein

\n", + "
nm3160583
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_similars('nm0655585')" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Robert Downey Jr.

\n", + "
nm0000375
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Gregg Temkin

\n", + "
nm1050612
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Samuel L. Jackson

\n", + "
nm0000168
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Jerry Bruckheimer

\n", + "
nm0000988
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Susan Downey

\n", + "
nm1206265
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Thomas C. Grane

\n", + "
nm0994529
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Tobey Maguire

\n", + "
nm0001497
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Rob Marshall

\n", + "
nm0551128
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Shannon McIntosh

\n", + "
nm0570690
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Nicolas Cage

\n", + "
nm0000115
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Woody Harrelson

\n", + "
nm0000437
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_similars('nm0000375')" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Andrew Lincoln

\n", + "
nm0511088
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Lennie James

\n", + "
nm0416694
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Karen Gillan

\n", + "
nm2394794
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Iddo Goldberg

\n", + "
nm0325221
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Ruth Negga

\n", + "
nm1550948
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Marco Rea

\n", + "
nm1368985
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Matt Smith

\n", + "
nm1741002
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Lucy Griffiths

\n", + "
nm2189625
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Nathan Stewart-Jarrett

\n", + "
nm2805533
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Ronan Raftery

\n", + "
nm3995719
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Bonnie Engstrom

\n", + "
nm0257749
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_similars('nm0511088')" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Michael J. Fox

\n", + "
nm0000150
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Marc Lawrence

\n", + "
nm0492909
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Mary Tyler Moore

\n", + "
nm0001546
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Tina Yothers

\n", + "
nm0001869
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Candice Bergen

\n", + "
nm0000298
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Justine Bateman

\n", + "
nm0000868
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Garrison Keillor

\n", + "
nm0445087
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Harvey Fierstein

\n", + "
nm0001213
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Rick Reynolds

\n", + "
nm0721930
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Patch Adams

\n", + "
nm0011254
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Anne Beatts

\n", + "
nm0064186
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_similars('nm0000150')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Algunos random" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('/Users/przivic/prog/machine_learning_practico')\n", + "\n", + "from lib import data" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "PATH = Path('../../data/')" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/przivic/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3263: DtypeWarning: Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " if (await self.run_code(code, result, async_=asy)):\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "title_basics = data.load_title_basics(PATH)\n", + "principals_df = pd.read_csv(PATH / 'title.principals.tsv', sep='\\t')\n", + "ratings_df = pd.read_csv(PATH / 'title.ratings.tsv', sep='\\t')" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [], + "source": [ + "candidates = (\n", + " principals_df[\n", + " principals_df.category.isin(['actress', 'actor', 'director']) &\n", + " principals_df.tconst.isin(set(title_basics[title_basics.startYear >= 1980].tconst)) &\n", + " principals_df.tconst.isin(set(ratings_df[ratings_df.numVotes > 100000].tconst))\n", + " ].nconst.unique()\n", + ")\n", + "\n", + "candidates = [c for c in candidates if c in model.wv.vocab]\n", + "len(candidates)" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Evan Rachel Wood

\n", + "
nm0939697
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Jeffrey Wright

\n", + "
nm0942482
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Michael London

\n", + "
nm0518757
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Brit Marling

\n", + "
nm1779870
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Hubert Point-Du Jour

\n", + "
nm2428854
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Margaret Qualley

\n", + "
nm4960279
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Anthony Katagas

\n", + "
nm0441097
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Charles Randolph

\n", + "
nm1017488
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Matthew Michael Carnahan

\n", + "
nm1996352
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Emory Cohen

\n", + "
nm1710309
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Albert Berger

\n", + "
nm0074100
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Andrew Niccol

\n", + "
nm0629272
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Rooney Mara

\n", + "
nm1913734
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Anthony Katagas

\n", + "
nm0441097
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Matt Cook

\n", + "
nm3208946
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

John Lesher

\n", + "
nm0971956
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Brad Ingelsby

\n", + "
nm2145487
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Matthew Michael Carnahan

\n", + "
nm1996352
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Michael Benaroya

\n", + "
nm2918260
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Lucy Fisher

\n", + "
nm0279651
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Lynette Howell Taylor

\n", + "
nm1987578
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Jennifer Todd

\n", + "
nm0865189
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Corbin Bernsen

\n", + "
nm0000929
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Alexandra Paul

\n", + "
nm0000575
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Julie McCullough

\n", + "
nm0567204
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Stephen Edwards

\n", + "
nm0250373
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

William McNamara

\n", + "
nm0001530
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Terry Plumeri

\n", + "
nm0006230
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Don E. FauntLeRoy

\n", + "
nm0005703
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Bernard Salzmann

\n", + "
nm0759574
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Nia Peeples

\n", + "
nm0001604
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

David O'Donnell

\n", + "
nm0640744
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Richard Grieco

\n", + "
nm0001298
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Paul Sanchez

\n", + "
nm0584052
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

David Murray

\n", + "
nm2733268
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Gregory Ouanhon

\n", + "
nm4130047
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Frédéric Thoraval

\n", + "
nm1754850
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Johann Benét

\n", + "
nm2829788
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Daniel Ouellette

\n", + "
nm0653496
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Andrew J. Smith

\n", + "
nm0807312
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Sanford R. Climan

\n", + "
nm0166787
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Jonathan Keasey

\n", + "
nm4906351
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Dennis Cooper

\n", + "
nm1418756
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Katharina Schaar

\n", + "
nm10258715
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Liev Schreiber

\n", + "
nm0000630
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Brian Brunius

\n", + "
nm2455729
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Kathy Svitil

\n", + "
nm2476563
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Campbell Scott

\n", + "
nm0001714
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Lauren Ohayon

\n", + "
nm2472199
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Joel Olicker

\n", + "
nm2298749
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Margi Kerns

\n", + "
nm2460169
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Neil Leifer

\n", + "
nm0500145
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Robert Hanna

\n", + "
nm1223085
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Daniel McCabe

\n", + "
nm0564431
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Kevin Spacey

\n", + "
nm0000228
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Gus Van Sant

\n", + "
nm0001814
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Joaquin Phoenix

\n", + "
nm0001618
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Adam Kimmel

\n", + "
nm0453981
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Ted Hope

\n", + "
nm0394046
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Leslie Jones

\n", + "
nm0428655
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Paul Thomas Anderson

\n", + "
nm0000759
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Philip Seymour Hoffman

\n", + "
nm0000450
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Matt Dillon

\n", + "
nm0000369
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

John Cusack

\n", + "
nm0000131
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Mark Romanek

\n", + "
nm0738796
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Lisa Rinzler

\n", + "
nm0727789
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Steve Coogan

\n", + "
nm0176869
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Peter Baynham

\n", + "
nm0063165
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Edgar Wright

\n", + "
nm0942367
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Paul King

\n", + "
nm1653753
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Ben Taylor

\n", + "
nm4829613
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Jesse Armstrong

\n", + "
nm1104036
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Sharon Horgan

\n", + "
nm1279721
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Armando Iannucci

\n", + "
nm0406334
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Tim Kirkby

\n", + "
nm1469191
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Nira Park

\n", + "
nm0661912
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Nick Frost

\n", + "
nm0296545
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Clark Duke

\n", + "
nm0241173
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Russ T. Alsobrook

\n", + "
nm0022540
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

John Clarence Stewart

\n", + "
nm6525603
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Erin Darke

\n", + "
nm3520615
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Frank Todaro

\n", + "
nm8972363
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Collette Wolfe

\n", + "
nm2180792
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Danny Leiner

\n", + "
nm0500444
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Daniel Schechter

\n", + "
nm1633080
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Jane Levy

\n", + "
nm3994408
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Brent Sexton

\n", + "
nm0786641
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Andrew Dickler

\n", + "
nm0225565
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Rajkumar Hirani

\n", + "
nm0386246
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Manisha Koirala

\n", + "
nm0463539
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Mahesh Anand

\n", + "
nm0025618
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Taapsee Pannu

\n", + "
nm3966456
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Sachin Khedekar

\n", + "
nm0451561
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Preeti Jhangiani

\n", + "
nm0422566
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Dilip Shukla

\n", + "
nm0795651
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Juhi Chawla

\n", + "
nm0004487
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Vishal Bhardwaj

\n", + "
nm0080235
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Ajay Rai

\n", + "
nm2490706
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Tabu

\n", + "
nm0007102
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "

Anushka Sharma

\n", + "
nm3087728
\n", + " \n", + " \n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Soha Ali Khan

\n", + "
nm1675786
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Vicky Kaushal

\n", + "
nm5817249
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Vasan Bala

\n", + "
nm3035236
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Vidya Balan

\n", + "
nm1799038
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Chitrangda Singh

\n", + "
nm1696711
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Laxman Utekar

\n", + "
nm2590164
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Pritam

\n", + "
nm1545345
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Vikas Bahl

\n", + "
nm2134474
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Bobby Deol

\n", + "
nm0219967
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

Sohail Sen

\n", + "
nm2992287
\n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from random import choice\n", + "\n", + "for _ in range(10):\n", + " id = choice(candidates)\n", + " show_similars(id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/practico-2/bias-variance-underfitting-overfitting.ipynb b/notebooks/practico-2/bias-variance-underfitting-overfitting.ipynb new file mode 100644 index 0000000..6c56297 --- /dev/null +++ b/notebooks/practico-2/bias-variance-underfitting-overfitting.ipynb @@ -0,0 +1,941 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Intuiciones respecto a la relacion entre bias / variance / underfitting y overfitting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Generemos esto con un dataset sintético con una relacion cuadratica entre `x` e `y`" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "def sample_data(size=100, x_matrix=False, min_x=-2.5, max_x=2.5):\n", + " x = (np.random.random(size=size) - 0.5 ) * (max_x - min_x)\n", + " x.sort() # util para graficar\n", + " y = x ** 2 + x + np.random.normal(0, 1, size=size)\n", + " if x_matrix:\n", + " x = x.reshape((-1,1))\n", + " return x, y" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "X, y = sample_data(size=100, x_matrix=True)\n", + "plt.scatter(X, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Underfitting\n", + "\n", + "Vamos a ajustar esta función cuadratica con una regresion lineal" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "\n", + "lr = LinearRegression().fit(X, y)\n", + "\n", + "m, M = X.min(), X.max()\n", + "\n", + "plt.plot([m, M], lr.predict([[m], [M]]), '--k')\n", + "plt.scatter(X, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Este es un caso típico de underfitting, la capacidad del modelo no es suficiente para explicar los datos" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Como se comporta con una parte de la distribución que nunca vio?\n", + "\n", + "Vamos a evaluar la regresion lineal en un intervalo de datos que nunca vio, que involucran al `[-5, 5]` en lugar de `[-2.5, 2.5]` con lo que fue entrenado" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "oos_X, oos_y = sample_data(min_x=-5, max_x=5, x_matrix=True)\n", + "\n", + "m, M = oos_X.min(), oos_X.max()\n", + "plt.plot([m, M], lr.predict([[m], [M]]), '--k')\n", + "\n", + "plt.scatter(oos_X, oos_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "La prediccion esta determinada por el sesgo que introdujimos nosotros. Este sesgo viene al determinar que el espacio de modelos que vamos a explorar, corresponde solo al de las funciones lineales en x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fiteamos muchos modelos lineales\n", + "\n", + "Veamos como varian las predicciones si entrenamos muchos modelos lineales.\n", + "\n", + "Podemos ver que el error es sistemáticamente alto" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "def sample_predictions(model, x, iters=100):\n", + " \"\"\"\n", + " Toma `iters` muestras de datos, entrena un modelo con esos datos \n", + " Devuelve las predicciones de los modelos\n", + " \"\"\"\n", + " ys_hat = []\n", + " for _ in range(iters):\n", + " X_train, y_train = sample_data(100, x_matrix=True)\n", + " model.fit(X_train, y_train)\n", + " ys_hat.append(model.predict(x))\n", + " return np.asarray(ys_hat)" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": {}, + "outputs": [], + "source": [ + "def expected_prediction(model, x, iters=100):\n", + " return sample_predictions(model, x, iters=iters).mean(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXIAAAD5CAYAAAA6JL6mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOy9eZhkV3nm+Tv3xr7lXpWVWatKUklChRAUi5EbkMAgbAEyNjbuxcy459Ezpr3A0IyF7cfgeZhBbXW3wW1Pd6ttPPaYMdCNkDCLWSywjW2WEkJIgIRKUqmqMqtyj32Pe+aPyO/kzVCuEZFLZJ4f5FNVmZERN7JKb3zxnvf7PqW1xmKxWCy9i7PTF2CxWCyWzrBCbrFYLD2OFXKLxWLpcayQWywWS49jhdxisVh6HCvkFovF0uMENnpDpdRHgTuAaa31jYufGwQ+ARwHzgM/p7VeWO++hoeH9fHjx9u4XIvFYtm/PPzww7Na65HWz6uN5siVUq8C8sCf+4T894B5rfU9Sqm7gQGt9W+sd19nzpzRZ8+e3dQTsFgslv2OUuphrfWZ1s9v2FrRWv8dMN/y6bcAf7b4+z8D7mz7Ci0Wi8XSFp165Ae11pcBFn890PklWSwWi2UzbNthp1LqLqXUWaXU2ZmZme16WIvFYtnzdCrkU0qpQwCLv06vdkOt9X1a6zNa6zMjI8/z6i0Wi8XSJp0K+WeAdyz+/h3Agx3en8VisVg2yWbih38JvAYYVkpdAt4P3AN8Uin1r4ELwNu24iItFoulF3ngkQnu/eKTTKZLjPVHee8bTnHnzeNdf5wNC7nW+hdW+dJru3QtFovFsmd44JEJ3nf/Y5RqDQAm0iXed/9jAF0Xc9vZabFYLFvAvV980oi4UKo1uPeLT3b9sTZckVssFotldVptlIl0acXbTa7y+U6wQm6xWCxtIuI9kS6hAOmTb/2zn7H+aNevwwq5xWKxtEGrB94q2hqeJ+bRoMt733Cq69diPXKLxWJpg5U88FY0MN4fRS3++qG3nt7Z1IrFYrFYltiI1z3eH+Uf7r5ty6/FVuQWi8XSBut53Vtlo6yEFXKLxWJpg/e+4RTRoLvsc2rx1620UVbCWisWi8XSBiLS29G5uR5WyC0Wi6VN7rx5fEeEuxVrrVgsFkuPY4XcYrFYehwr5BaLxdLjWCG3WCyWHscedlosFss2sJWzya2QWywWyxaz1bPJrbVisVgsW8xWzya3Qm6xWCxbzGpzWbo1m9wKucVisWwxq81l6dZscivkFovFsgoPPDLBLfc8xIm7P8ct9zzEA49MtHU/K81l6eZQLXvYabFYLCvQzQPKrZ7LYoXcYrFYVmCtA8p2BHgr57JYa8VisVhWYKsPKLuJFXKLxWJZga0+oOwmXRFypdS7lVLfV0o9rpT6S6VUpBv3a7FYLDvFVh9QdpOOhVwpNQ78GnBGa30j4AJv7/R+LRaLZSe58+ZxPvTW09uyPLlTunXYGQCiSqkaEAMmu3S/FovFsmPslsUR69FxRa61ngD+PXABuAxktNZf6vR+LRaLxbIxumGtDABvAU4AY0BcKfUvV7jdXUqps0qpszMzM50+rMVisVgW6cZh5+uAZ7XWM1rrGnA/8MrWG2mt79Nan9FanxkZGenCw1osFosFuiPkF4BXKKViSikFvBb4YRfu12KxWCwboBse+TeB/wF8B3hs8T7v6/R+LRaLxbIxupJa0Vq/H3h/N+7LYrFYLJvDdnZaLBZLj2OF3GKxWHocK+QWi8XS41ght1gslh7HCrnFYrH0OFbILRaLpcexQm6xWCw9jhVyi8Vi6XHszk6LxWLpkAcemdiyxcobwQq5xWKxdMADj0zwvvsfM4uaJ9Il3nf/YwDbJubWWrFYLJYOuPeLTxoRF0q1Bvd+8cltuwYr5BaLxdIBk+nSpj6/FVhrxWKxWNZhLQ98rD/KxAqiPdYf3bbrsxW5xWKxrIF44BPpEpolD/yBRyYAeO8bThENusu+Jxp0ee8bTm3bNVoht1gsljVYzwO/8+ZxPvTW04z3R1HAeH+UD731tE2tWCwWy25hIx74nTePb6twt2IrcovFYlmD1bzu7fTA18MKucVi2TM88MgEt9zzECfu/hy33POQ8bE7YTd44OthrRWLxbIn2KrGHPnenezcXA+ltd72Bz1z5ow+e/bstj+uxWLZu9xyz0MrxgCheQC528S3HZRSD2utz7R+3lorFotlT7BWA05rZHCvYYXcYrHsCdY7fNzutvntxAq5xWLZE6x0KNnKdrbNbyf2sNNisewJ/IeSq3nluyky2E1sRW6xWPYMd948zj/cfRsf/vkX7frIYDfpSkWulOoH/hi4EdDAL2mt/6kb922xWPY37Sxt6IXIYDfplrXyEeCvtdY/q5QKAbEu3a/FYtnHdJIN3+m2+e2kY2tFKZUCXgX8CYDWuqq1Tnd6vxaLxbIbljb0At3wyK8CZoA/VUo9opT6Y6VUvPVGSqm7lFJnlVJnZ2ZmuvCwFotlr7Mbljb0At0Q8gDwYuA/a61vBgrA3a030lrfp7U+o7U+MzIy0oWHtVgse51eGFi1G+iGkF8CLmmtv7n45/9BU9gtFoulI3phYNVuoGMh11pfAS4qpeQn+1rgB53er8ViseyGpQ29QLdSK78KfGwxsfIM8D936X4tFss+ZzvTJ+1EHXcDXRFyrfV3gedN5LJYLJbtoBsCvFVjcAG01pTLZcrlMqlUCtdde5TAZrEt+haLpadpV4Bbxb9Yra8adWxHyLXWVCoVyuUypVIJrTWO41Cv162QWywWi5+1suarCfBK4r8am4k6aq2pVquUSiXK5TKe56GUIhqNEo1GCYfDG76vzWCF3GKx9DQbzZr7K3BHKRobXKqzXtRRa02tVjPi3Wg0UEoRiUSWiXetVqNYLBKJRHCc7o65skJusVh6mrH+6IoVtV+AWyvwjYr4alFHEW/xvev1uhFv+QCMwDcazcd1HIet2MpmhdxisfQ0733DqWUiDc8X4JXsl5XojwaJhwMrHppqranX68bzFvEOh8MkEgkikQhKKer1+vPEOxQKAVCpVLr51A1WyC0WS0+zkUmHG/G5o0GXD7z5Bcu+TyrvSqVixBsgFAoRj8eNTSIC7xfvYDAIQLVapVgsmu8Nh8P2sNNisVhaWS9rvpr94iqFp/Uy8ZfKWw4t6/U6WmtCoRCpVMoIsdxGBNpxHAKBAEop44fL17TWeJ5HqVRicHCw68/fCrnFYtkSdlNzzWr2i3SJingXi0VKpRK1Wg2tNcFgkEQiYcS70WgY3xuWxBswlopYLo1GY9nn8vk8AAcOHOh6esUKucVi6Tpb2VzTDivZL//29ddyx+mDFItFyuUytVoNz/MIBoPE43HC4TCBQADP88zBJoBSikAggNaaRqNhvlfEu9FoUCwWTVXueR6BQIC+vr4tiyBaIbdYLF2nnWz3VnPnzeO85UVjpqoul8vMz8/TaDQIBoNEo1EikQiu6y7zxqEp3p977Aof/sqPmFwoMZpweeerT3D7jaN4nofneeTz+edV66lUilAoZKyYdDrNgQMHuv7crJBbLJaus5Fs93ZZL1I5izDXajUajQaBQIBwOEwkEjEVtvjeWmuUUjiOg1KKBx+5xO/c/11K1ablMjHv8bsPfpd89gSvunqAcrmM1ppAIEAikTBeealUIpfLkc/nyeVyFItFDhw4QDTa3TG8VsgtFkvXWS/bvdXWi9aa+89e4N6//gGTc1lGkyF++VXHueNFhwmFQsY2gSVvW8RbPqRLs1ar8R8+/z2K5QqNRhVVr6IbNQq1Gv/lywVecfTlxONxHMdZ1hhUqVTI5XKmw9P/0W2skFsslrZZrapeL9u9FdaLv/K+/9vn+cCD36NUraGUw+VchXv+5gKp/kHe/KJ+422LeAN89tFJPvI355iczzGaDPHO11zF7TeOUqlUmJyaoVGrob0aSoNWoAJBZirNLs5MJkO9XqdSqVAoFKhUKqbxR5IrruuSSqXMC0g3sUJusVjaYiNV9WrWSbdWuPnFu1arUa/XqdfrfORLP6Tc0DihKMoJoIFy3ePezz/O668bNOItvz748AU+8JnHKVaqKAUXZov87n//FnMzx3jV1YMMBOrMlCoQCOB5DZSnadQKHEiEmZ2dpVgsUq1WAfA8b1mnp0QWY7EY0WiUZDK5uR/0BrBCbrFY2mK9qnqtbPda1st63rlfvEW4peqVxMl0xUEFIyit0Y0aaI0HXM4sXa/EAz3P48Nf/AH5Qh5Vr1Cv1/DqNQq6wUf/tsIrj5/hX7zsCP/5q09SqhTRaHSlRljVeePJAdLpNI1GwyReIpEIsViMYDBILBYjFAqhtSaXyzE7O8uRI0eIxWJd+BtYwgq5xWJpi06q6tWsl1uvG1mxytda86YXjpqDSvkVIBAIEI1GCQaDOI6D53mMJoJMpot4MtdEKRQwmgyZ75UGnVKpxMXJSerVKg4NtFJ49SooxfR8syvz5ceSFF56iE/849PM50v0x13e9MJD3HwkgeM4RrDFew+Hw9RqNRYWFozVki9VuFyP8w9fPscbXtTg1uu6l16xQm6xWNpiI8OqVmM168Vf5WutQXsUihX+3V89yq1XJWg0GiYdImkTGUQlTTxaa3711pP87md/QKnarNTxGoQCDr/840fI5XIUCgWT9a5Wq/SpCrP1Mg2tUUqjPdDAcDxgKu4bR4Lc8KZrcRzHpF3kxSMUCuE4DuVymXQ63TzsrNa4XFKcL0d5ppjimaxDXQPMUlVhK+QWi2Xn2ciwqrVYyXp518cfQWsP3aiD9tBeAzyPiflmnjsUChnxhuZBogg4YA4v3/SiMTQef/DlJ7i8UGQk5vCLZ4a5PlXl4sWLVKtVU9lrrXnbSw7x0a8/Q7kONBqoeo2g4/HGU+OUSiXT4RkIBAgGgyilTKdnqVRidnaWarXKfFlzvhzl2VKMczmXfG3l5/71c7PLDlo7xQq5xbIH2Il2+I0Mq9rotd5x+iC1Wo3RmGJioQy6Aajm/90AYwNxotGo8cdl/okIuD8yKN75jx+NcfpnT5qZJ+VymdnZWbOhx3Vdk/d++bE+yqVRPv2t8yyUKwwkwvz0S05w641HCAQCRrRd16VUKlGtVimXyxSrDZ4tBjlfjvB0Ps5UaWPCPJ2r8KOpPKdGu3PwaYXcYulxNpvJ7qbob3Yxsv9atdfg4kya3/j4tyjmr+eNNx7kV159nP/jsz+g3AiAcsBRRAIu73rdNVSr1WUZbGnYEfFuNBrk83my2SyFQoFqtUqlUqFSqZg2edd1iUQi5sVA/GvP8zhzJMUtV7+UaDRKIBAw9ylNRLVajWK5wuXyonAX+jifV3htjhf/+6dmrJBbLJYmm8lk7/QMlN/7/A8oFIs0TWiN1h7FuuYjX3mSN71onJ9+6XGC4TC//+WnuJwucigZ4ldvPcntN4zQaDRMpyVgctu5XI5sNmvmhJfLZSP64qMrpUwsMJ/Pm7kpoVCI/v5+czgp4l8oFIyQz5Sadsm5fIyn80lK9faff9xt8GMnBrn95uO86trhbvxIASvkFkvPs5n0yE7MQPFHBCdm02jtoWgeJqrFqvtKvm4GVP3kjQe5/YYRU327rmvEWzon8/k8mUzGDLySwVVaa2KxGJFIxHy/VOWlUgnHcYjH44yMjBAIBEy1Ld9fr9cp1OB8KcTThQhP5wPMVdr3sQNKczhU4likxLUpj+P9IV796hdz+PDhTn+syx+nq/dmsVi2nc2kR7rViLMaYttMzOc5lAzxa7ddxR0vPGT87NG+MJczi1tylGp+AIdS4WXdkH7/WoQ6m82auSVSfUuVLgkSEe9yuWyadFzXJZFIMDg4iOd5lMtlcrmcmWpYqdWZrEZ4rhzlqZzLxYJC0754HwhWOBYucnWywbUDLoN9SVy3GY/UWnd9zgpYIbdYep7NpEc6iQyux6e+/Ry/ef+jlCpV0JpL8xXe/+BjeJ7Hm180jlKKd7/+et7/4OMUa41ms45XJxJw+ZXXnDBJEBHvYrFo/O5sNkuxWDRiHwwGCYVCplL3PI9CoWAmD0rKJBQKmXcD6XR6sepuMFsLcr4U5ulCjKdzDlWvfeFOujWOh8ucTNS4bkAxkooSiQysmEgRMe82XRNypZQLnAUmtNZ3dOt+t4LdNPDeYumUzaRHOo0MtuK3Te79/GMUy5Ul2wQo1zz+4KGnecvNh1FK8VOnR6nXavzBV37ElWyJsf44/+zUCB/5m6d53ycfYTim+MWXHODlR+NkMhlji8DS7ktJqMgGH2mNj0QiDAwM4LounudRrVYpFAp4nsdCqcGFaoxnCgmeLgTIVNsX7qDyOBoucVW8ynV9cGI4TiAQM/69HMK6rmu6OkXAJTHTbbpZkf868EMg1cX77Do7fdhjsWwFG02PtBsZ9OMXb38WejJd8hkSzd9ppbmSLpqlDc2M9zh33DTWnFD47Wf44GcepZQvUi/nuXClyD1PP86/OHOYl58cNrsvA4GAyWwXi0UzhjYSiTA4OIjWGtd1qdVqzcq8Wud8McBzpSjnCkEul5zN/DiXodAcClW4KlblVL/mqj6HaDhEIBAxzUnBYJBgMEg4HDYJmUqlYmauVCoVQqGQyaB3m64IuVLqMPBTwP8J/G/duM+tYjcOvLdYtpPNRgZhZfH2C5LWmkN9US5nymgWDzN1AzzNwVTTtnEcx8T5yuUymUyG//jJr5GdW6BRqwIeDg4VpXnwsSn+2fVjS3ntYhFopkxWWgBRrlSZLCrO5YM8XYhzoRigrtsXzIFAjePRCtcm65waUMRDLsFgGKWUEWz5AIz3XigUTN48GAyahRJaazKZjLldt+lWRf5h4H8HVg1FKqXuAu4COHr0aJcedvNs9WGPxbJXWEm8Jbcty4T9/Pprr+YDDz5GsVpHL3or0aDLO19zwhw8zs3NkU6nzZzuySuXcRbNGOW6NBqAV2NuLs3k5KSpdP2bdmRhw1zJ41w+yLmcy9OFFMVG+1V3xGlwIlrlZKLG1Yk6IzFnUYgjRCIRwuEw8fhSU5JYPrJhSN45SMepePbyzsG/uDkej7f/l7IKHQu5UuoOYFpr/bBS6jWr3U5rfR9wH8CZM2e67/ZvkK087LFYep3VxBuWpg76uyk9zzMDqF577QC126/hD7/6NFcyRQ7GQ/zrW45wQ7LCo48+SqFQ4B+fmuIzj0wyX6wymIgQDQcolBvoWgWnUW2mRRyHwVScwcFBI96e51GsefxopsaTWcXT+Siz1fbly0FzJFrlqliVq+JVjsQhGGh62rFYikgkQjKZJJFImN2b5XKZubk5arWa8cBl7kqj0TBevXytv7/fXL//5yfDvrpJNyryW4A3K6V+EogAKaXUX2it/2UX7tvQrQPKbh/2WCy9zkri3So+/qSFfO7Bh5/jj756jiuZMof6orzz1pO88fQhbjmeYHp6moWFBfL5i5w71/z+hy+m+YtvXKRSq6F1nen5AoGGB6qBckJ4wTBuMEQwGOZtrzxOMBzh6XSdJzPwVNblUim0aNq0R6OUo5adJVLNcPdrx4kG3cWxtwMkk0mzHLlabU48TKfTXLp0yXj7ItqJRMK8G5EXMhmfK8ItUxir1aoZsSv3sys9cq31+4D3ASxW5P92K0R8rQPKzYh8Nw57LJZeZzXxlnklrbNMRLxl2NQXH7/CPX/9FKV6g4b2uHB5ht/+86d58uYRXjQWM+kM/xb6T/3TU5RzeRwNWjmoUJBGMEYyGiUeDTKdLzOYSnL6xBiPV/p48GyAihdq+zl6tQr17CzV7DTV3ByO54HrUgvFOHXyBIODg0SjUSqVCtlslsnJSXOQCpiIYzAYBJY8/lqtRjgcxnVdwuGwya9LQ5FEID3PM6kVx3FwHGdLtgNBj+TIVzugfM8nH+Xsc/N86uGJTaVQ2jnssVh6nZXE2y8s8nkRMklbyCwS8YGDwSD/91fPkcksoIsLNMpFvHqFivb4xD+luemtLzQecbFYNN87n6tAIIoOBAiEwjSAgBOgGkpxzYmjVItBGsEI360Alc0/v4DSHIlUOBoscDJR46MPPU6p1lwwER4YRYUS4DgMR5svWufPnzfC7a+4k8mkeUfiX1ghXrfruuYFTbpMJTUjL17yc5JpjeFw2LxYygFpN+mqkGutvwZ8rZv3CasfRDa05mPfuECr4W5TKBZLk9XE29/y7hdvqbplVkkgEDBjY+v1OtPT00xPT3P+ye/h1Zqetm7UaaBxvAZz83kuXbq0LN0hLfMHDswxW6wRjPYRiPcRSAziRpMopThfA4KbfXaag6Eax8JFjoaKHI83SMSaWfLBwUHiI0f5yEPPUK6W0LUyjfRlgtR5/ZlxLly4YARWxNZ/gKuUIhQKEY/HTdJEZrAARrTl8FLuRxZLyLsb8cv9P0ep8LtJT1Tkqx1QAs8TcWG121sse53VxFsOLSXTLV+Xmd6VSsVUjZLOKJfLTE5OMjMzQzabNd/X58JUoYryqqhGDVWvoR2H/mScZDJpVp2B4koJnpoKMHzyJuq1MMppvyEm6dY5GipyJJjnZKLOcLKZIx8YOEIqlcJ1XTNI61Syws+d1Hz64SkWitXmaNoXX8VrXjBm7BDJgYvgSpxRYo9ii8gKt3A4TDQaNaLtT6rIz048cvlVEPtFbJlu0hNCvtIB5Xoomt66rcot+4HVxFvEScTav6hYPF3xbhOJ5gaeQqHA5cuXmZ2dNTNJqtXqMpG75YjLZx/JUtUa5UbR4TChSIifecVJiCR5PB/iqVyAp/MBsrUlMVObTAjqRp0RleMFqSon4zXGkgH6+/sYGrraLHqQVMmlS5col8vmZwDw8pNDvOqGMdNZKVWydGHKBEW/ndRMrjTfRcTjcSPg/mYe+VXEWj7MdfvOFMxz9x0id5ueEHIR4/d88lEaG5xToMHaK5Y9zVriLV+XqX4y5U+igtLuLod9pVKJ5557bjFpkjczTfwvAIVCAaUU4XCYH7v2EPFYnM9/f4ZMyWNg9CBXHzvCNxpJHniifbHS2qORX6C2cIVafo5GqUQpFuWON9zET9x8vYn65fN5ZmdnjXCLRSSNOP5ctyRIpMtSfmYybCsajRKPx0kkEqbZyP8OBpYWV/gF3I//HY7cTlr0/dMbgWX32y16QshhScxXig6uVqnbJh/LXmM18ZZDSxHvSqViZmuL9yuermy5mZubY35+3oi3dBzKZnkRSRHFeDy+aBkEmG1EaAyPcM1Lr+e5gktdK87VgFVWm61F1CuTmZmgsjBNvbAAoTBuOImO9RFMjYBy+MQjU/z4tQeYmZkx1yWNOHJIKT8HqYZrtebFVCoV864jHm/m0+PxOLFYzCxtlp+ff+SAX3wlzSO/l/sGnueDy334Ez/+5qldPTRrO1hrYWs3mnzsMC3LbmQt8RZRKRQKZqGC2CCwlLbQWlMqlZifnzfiLcsTRGjq9br5Xn/V7rouRcJMNvp4Jhfi2UKQQgddlGFVZzyQ51i4xPFImeMjSZ6cT/LAD6pMJwdwcNC6jler0Cgs4FVrXKbO+fNJ845DRFksErlmf4IkGo2STCZJJpNEo1EzDdFfIfunJ/rxRy/lz35bxu+Ny/34D43lc/ICIt+3r60VP6tFBztt8rHDtCy7ibXE23Ecc6An1bfcTsQXmuKUzWbJ5/MsLCyYRQzi28q4WMAcdErTiueGuUwflyoJni2GOuyi9DjoFjkSLHBVvMKpkRiDA/2kUmMkk0kcx+HaSoXbThf45T/9B6bTWRyvgVevgePgKIfBZJRUKmUqZ3kBkxccSY+kUilz2BqLxZbl4wUR/ZVGDcjt/MLbemjpvw//98kLjN833y56TshXIxxwjAgPxIK8/00v2JQAd3OYlq3sLe2wnnjL5L9SqWQOK/1eLDQFeXp6mlwuRzqdJp/Pm8gc8Dzfu1wuNz1dxyXt9nFZ93GhEuNSKdhRF2W/KnE0VOBEtMyNByMM96fo7z9APB4nGAyaRRHnz583z6dSqXDrsTCfWqjR0C4qGIVAgHAoyM++bNxUvPJ8+/v76evrMx2ZMqTKvw5OOi9FtFsnNvrTJRsVbX+V3fp4O0XPC3lrJQ3NGcibpVvDtGxlb9kMa4m3DGfK5XLLDitlvnUwGKTRaJjlC7L+zG+ZSJVdrVaXHXp6nqYaSnFFjTPRSHKhGKHitV9BRqlxOJjnqliVFwy7jA8mGBw8SjQaxXVdsxxiamrKrFZr3Wbf39/P7QcP0jcwxKe/M8F8scRQPMLPnjnGbacPk0gk6OvrM8Or5HvFwpA5JlKpy88AlmwOsUTkMeVrwPMq9O20Rjql54W8W5V0t4Zp2TG5lvVYL+ctLeMyp0PSFrBkh8zNzRnxlkrdL0CSMhHxrlar1J0QM84gU2qQS404mXz7jSkuDcYCRU5EK9wwpLhmJEEqddDsyiwWiywsLHD58mXTBSkHp/6IXzgcfp7/fNuNh/nJM1fT399vDiX9SRLXdU36Ru5XopTm+hYPdlutDv9BpP8wsrUy325rpFN6Xsi7VUl3a5iWHZNrWYm1BlNJK7wcVPrFWwSqWCyaVWalUsm0zvsnELZukS9Va2QCg0w741yhn+laBNq1S7TGK6apZmeJ1gu86YZBXvvCw0QiQ0ZI5+fnTUeoX1TlHUQstrRFRwRWGmui0SixWIxEImGEWypu/9RASeLIOxPpPpUopb/SXn75yyvt7bBG5DHFj5ff79uGoLXoViXdrWFadkyuRZC3+f4GFf9YWLE7/OInQ5dkTZmItjSuyO0kHijCXq1WKZXLpL0o084gM+4QU06KuufA5p1GABKqyoloGbeS4euPP0fd83CcAGWt+f/OFggE4IWH4ss6JFu7JWXuiBxIhsNhIpGIqcj9FbfYJP4XJhFtEW4Rf7mPVkH0V9r+FvmVOi27wUpC7T9I9eP/u+82PS/k3RxL241hWnZM7v6mVbwlBQJLlodkoEWcxRqRCKEcZMptACPm/qq8VquRLntMqQFm3UNMqQFK7uK0wDbEO0id8WCRa5J1rk15jKdCBAIuv3n/BLWGh9IeXqMMGioNj09961luuvO0mcMCSxMDpdKWGSTRaNSMgJWmG9pFIuEAACAASURBVLE6/PNd5EVNFjIEg0HTEu+fUeK3RWApZdJta2S1qlr+3Ir/Ovx20VbbND0v5LttLO1uux7L1uMXb39Kwj+/Q/7jF9HO5/PkcrllCZRAIGCEzF9xiw9eKpUo1z2u1OPMBA4yrQbJBBNtX7dCM+IUOB4pc22qwYk+h3AwgOO4eJ5Caw/PU8zmS4Ba/B80dA1dh3TRMV60CHUkEjFzSaLRqPmQRImIdi6XMzaSNCvJDk4ZtgXPP4iE7qdG2qmq5fFbW/53ip4Xcth9Y2l32/VYuo9/ZomIkbz1b50uWCwWyWazyzoopStRWsblgFD2UxYKBQqFApVqlXkvxkQjyYx7jHlnAC/UfnWXpJnnvjblcU0fJMLiQy+vZOX6q9UqgzGH+VwVrRTadXGCSZxggAP9KU6ePGmq70gkYiwP8cDFPsrn8+adiDyO2Cv+6YN+umWN+HPjrULd2ggEq1fVW7UUohvsCSG3WLYDEW+xOVoXEPiXKKTTadNFWSgUTGLDLwjmULJUIpfLkc/nm6Jfd7ms+5niCHPBEapuCNo8GwvpKmOBPFdFK1w/6DAUYfEa3GWzQIBlc7YlFRIKhfi5l5/iz751gboTwwmEUG6QaDTMu+44zZEjR0y1Le9GisWisYZEfL/yxAz/9esXmM7VGB2I866fOMWbbho21ymHv+1aI2sJ9VovEP5o4U5X1Z1ghdxiWQPZ+r6Uv15asBAOh81Apmq1ypUrV8z8kmKxuGwzTDgcNpZCrVYjl8uRzWYpl8vkyjUuN5JMq2Fm3GEKHdgljvYYcXIcCzW97hP9QUAiji7xeNxUy/4DVvG4W7sko9EoN90U5tg11/Hfvv4c0/kqo/1R/s2t13L7jaNmgJUsZxBBljG2ruvy+cenuPerlyjXNLgBruRq/O7nniQYCvHWlxzdkID6M94rWSHP+zk4Sxt5WoW6V8V6LfadkNuuS8t6yOAp/0hUqV7FNpHZJdPT08zNzbGwsGBSJTLfRIRSNuWI372QyTJVi3CFfuaCR8m4A+hA+3ZJNTePk5/mhYN1Xnc8TiwcWLyGqMlqy/xtmadSKpXMi1IkEmFkZOR50T+pXJVSvPklKe64+aip1uUdhWTCpYnJ71nL9//h356nol1UKIhanGNb0fDhh57lbS87YZ5HO1V1q1hvZTJkN7OvhNx2XVpWQxYJiHhL1rg1FpfNZpmZmWF6epp0Om2y3DLvWhYbFItFisUimUyGbC7HbFlxhQFm3VEWQqepB9pvxgnrCvHKLBefu0ApPY3rAG6Af5wOEQ4c5a7bTi7rDK1UKuTzeaBZqcZiMYaGhujr6zMedeukv9ZhUmINyeGmvCtpnce9kjVyJd9AuYHFqtoDrUF7XJpdekGxVXVn7Csht12XFkHsEMlpi5C4rmu6COUgcm5ujrm5Oaampshms8b/FSEDTLUtaZT5QpWJWpxpd4iF0HWUY7G2r9XVDUbIMOZmORoqcDjh8p++doFKQ+GmRnDdINoN4jiav386zbUHL3HTWNyIYDQaZWRkhFQqZV6YYHmu2d9dKe84WmeQ+NvpW/1sf7pEvHJpkT8YU0ymi00B96MUp37784z1x3jP609x54uP7OuquhP2lZDbrsv9i+SVK5WKOWRsFW+pvCuVCul0mqmpKWZmZshkMpTLZSNcIjSFQoFMJtM8pKxUuVQKcVn3MR+6jmyoH8Ltd1H26yxjTpYjwQJHojUG+pLE43EGBg7T19dH/ttVgl4d3ahCrQLVAh4uuIoHH5vmTa94tYkE+meKiECulHEWgfePihWbyP/c/faHJG7Wqqrf/bpr+Z2/+gHlure0Ikgtif+VgsfvfPZHBENhW1C1yb4Sctt1ub+QhhqxTaSSFhGTzLI05mQyGSYnJ5mdnTWzTvx2gfjdzYRJgZlqgIlGkmnnKAvBYbxo+/85Rb0SoyxwJFjg6mSDoWSERKI5vySZbM7hlvkp8/PzDDgl5so1cAM4kWaaxHEDqECIjHLp7+9fNlQKeJ4g+w8nZQhX65xtEW1J66zkVa+VrQb4+VdeTTgaXXPDl31n3Bn7Ssjb7bq0B6S9gX+IknQJlkolIz6hUIhEopkIkbTF/Py82QyfzWZNhSkVer1eNwmTdNnjYi3OtDPErHuKajDaxub3JgGvxrBe4GiwwDWpBocSAVKpJInEkOlidBzHjHuVxiLZSv8/3fZC/tPXnkO7DsoNoHDAdVA4HExFTOekWERSbYu3LVV3a5Xt3zEprDQF0G/DbIQ7bx7n3Z/47pq3se+M20dtxdqh9Thz5ow+e/bstj8ubF6UVxqTq2juBB23or6jiF0imW452JMtNxKlC4VCaK0pFouk02nS6TRXrlwxDTqS3pAK0vO85tjVQpnJeoxJL8W0GiIfSLV9rUp7DHhNn/v6Abh6IEgs2lzuK4kYebfgnyvin1ci4izvJD7y1XP81XevoFmcLeK4RENBfvOOG7n9xtFlOyz9jUprVdUrNcJ0i1vueWjFd8SCqxSe1rZYWgOl1MNa6zPP+/x+E/LNst4/vmjQ5UNvPd3Vf3T2HcDq+IVbBk7Jh9gG0uRSLpdJp9Omq3J+ft5s1ZGdiyJWzaRJicuVABP1JNPOIAvuAJ5qf0pdrJFn3M1xTbLO9UMBhvviRpQBE1WEpapXqm6plsXnlj/7540opfjS9y/z3/7uWabyNQ6mIrzztmt5081Hlj2O//5X61jcDlYqilZjK/672gtsmZArpY4Afw6M0hzVc5/W+iNrfc9uFPLVxPPE3Z9jvZ/QeH+Uf7j7tq5dx0r2z379Ry0Hav6WeP+MDv88E2mHz2QyZLNZU4FLjlsqXX+0brbkcbEaZ9JLMeMMUXNCbV9r0KsyqtKciFY4PRLk8GDMxPUAM09FBNvvT/tnlviXJPtnZ8uHP7MtNox/k9B2D2zaDPLf2US6hKsUDa3Nr61087+rvcJqQt4Nj7wOvEdr/R2lVBJ4WCn1Za31D7pw39vCWvny1Q5I/az39c2w3yOSfrvE3wrvH0jlOI4RdP/kQP9kQMlxa62pVqtG2Io1j4uVKJNeisu6n4ITB4fmxyZRusGwznI0XOSGQYdrR6KEQ3HC4UEzS1weHzCNOa0CLO8g/BvYZca3f0FCJBIx99G6MKFXctUrzSE6cffnVryt9cw3TsdCrrW+DFxe/H1OKfVDYBzoGSFfSzxXOiBtRdF8MeiG0O7HiGTrAaV/+4t/oYBMCpTsd7FYNEkK2U0pY2FlfGzD08yR5LlKlMteH/MqhVZO8y+tDe1LeTkOB/OcSmlODbkko2Hi8QM4jmPeNRSLRdMkJJP//JE+v4hLo5F44zJ0SgZQSZJkK2ZpbzUbtQhtmqxzuppaUUodB24GvrnC1+4C7gI4evRoNx+2Y9YST/9Y2tUqb7349W4I+X74Ry1xNv/uRvGsZUGubLqRX7XWlMvlZUsHZKpeOp32zQCHkhvnYi3BhJdimj7qatErbkMHw7rCmJPhZLzGDYMOA1GXRKLPtLzXajUymYw5lIxEIqRSqWUHjH5BFjtIst1SZfsXJfjXvvUqm+mitjP8O6drQq6USgCfAt6ltc62fl1rfR9wHzQ98m49bjdYTzz9bwePb/HbwL34j1r8a8lzywAqOZ+Rr0mF7Z/AByzb+ZjNZslkMmbkq9aakucwowa5rJtb4IuNxRe9NipuVzc46GQ5Fi5xXT+MJxwikTBKhYy3ncvljOAGAgGGhoZWXKKw0jwQEWtZvrDa5vbV6JWD8M1YhHaGf+d0RciVUkGaIv4xrfX93bjP7WQz4jm+xRXzXvlHLdW2iLM048gAJNm9KKu8xEcWi0HEW7xv6aCsVCrUPM0cfcy4R7is+1ggiVHtzYq31gyqPEeCBa6KVTmWaBAOuEa0Hcc1h5QyIKqvr89YH8lk0szjlsNL/9RD/3wS/5zudjztrZwV1O0XiM1ahHaGf2d0LOSq+S/yT4Afaq3/Y+eXtP1sRjy3o2LutX/U/sSIHDTK/BL/thx/ZFCmCsr3ikculXYmkzGrz4qlEjmVYNoZZNoZYsbpoyGxwDaq7pgucUhlOB4tcyxcJhZYSpBEQ1Ej0lJlBwIBM0UwFosRjUaNNSKC7xfvQCBghNs/j6VTtuogfCteIPaDRbib6EZFfgvwr4DHlFLSuvWbWuvPd+G+t42NiudeqZjbRVIltVrNxPpkcYJ/5CtgLAM5oPQ8z1TgUo2L1dKaOMnVHabVAFNqlJngEGUVbvuaA7rGAZ1mzMlwJFhgKKwJBFwzUKqvr49kMkks1owLxuNxcxgpc8f9gizvLKSylopc7JKt8re36iB8K14g9qJFuJvpRmrl67RVF/UuvVYxt7LRt9H+5IjYJP6t7VKJBoNBlFLEYjEcxzHzvOWgUj5k7Zd8v3yuWCySL1e5Uk8wpYaZCQyT7WQXpfbo99IcIsMhJ8PRuIfrqMWFCf309/czMDBANBolmUyazDYsn0ciVbf8HGBpwNZ2CHcr7Va56/19d+sFovVxfuYl43z1iZl9WfBsN/tq1opl5bfRd3/qezQaDd580yGTJpFqWaJ+/mYaf8ehWCaVSoVsNmuqdP82eLk/8cqb911hqhZispFi2jnMrOrraBdl3CtwwJvjgF7geLRKXyy0GP/ro79/Sbxl841/prW8y2htnpFRtf6DTPHCd4J2qtyN2CbdsEFWepxPPTyxbxvZthsr5PuM3/vrJyhWqmivjm40wGuQ9xp86FP/xIuHXmb8a//qL8lDS7UqzToLCwtmI7xkuOV7/b63WCoLVcWFapzLepgp3U9VBTvaRTnizXNQz3M4UKA/4hGLxYjHk6RSKQYHBxkaGiKVSpFKpZatZZM8un/GilTfUmX7P3ZLFLAdW28jtkk3bJD93si201gh38P4dzJKZXxx8nLTEmk0Fre1eGivwUTRo1wuAxjvGDAditVq1bS9y5Jg/3RByYFLg06tViNbqnKxGmOyMcBlL0VO+ZYrbLKodbTHkE5zUC8wygLDbplEX5xEIkEyeYSBgQGGhoY4ePCg2TUpeyllubF0ifp9bVmcvBWHk6vRSUJks7beRmyTbpz77MdGtt2EFfI9gL+tvbUSbp2lMxx1mcqU0F4DaH7dAQ72NVd+ffH7V/ivX/0RUwsFhqOat79kjJceSRg/O5/PG6EWu0Wq22K5wpVqmAvVOJPeAeZ0stlFCW2dovTpPIdUhlGVZoQMyXizcaavb5TBwUEGBwcZHR3lwIEDxueWxqFcLres8pbDy3g8vqwBp5M4YDts97rBjdom671ArPfiY1MqO4sV8h5DhFM+pOKW7Sz+JhT/pnS5/f/yijH+/RefoOR5KMcFNCEH/vmLR/jE3z7GHz30BKVyBeo1Ls7V+Mhzz/AzN4/yoiN9Zpel7G+sVmvMVR2eKYa4WIszpfupK98/qU1qY0RXGHObW3FGVZoozfx2IpFgaOg6Dh48yMjICGNjY8RiMbNswZ+gER9edm427Za4OaD0j3PdCTZjQXQj290N22QjLz42pbKzWCHfxazkN0t6AljWFi6Dk8S/LhaLxhKRyjwQCPCaU8NUKsf5k797ipn0PANhl7fedJBTqTq//v9+m3yxgK7XUV4D7WmqSvGFxya56XDzgDBbaXAuF+B8uTm7pKgiSxe8SW10dYOxQIGj4SLjbpakV0CpprUzODjC8PAwhw8fZnR0lKGhIQKBwPPy5tJYJPNKQqEQyWSSRCJhlkPsppb3jVoQ3arcu2GbbOTFZ7/HcncaK+S7gNaqWX7vt0Vk2JJ/XKkcIkoWW2J9/upcbuO/3fFwnQ/cdhDPGzG+8ezsLAuZ/GKl6uI5HtoJ0qg1SHshPncxwKV6grRKgmqzixLNsFPiRLTEsXCZIZ1BN5rVcyQSYWjoKOPj40a8I5GIScRIV6e/qQiaccBYLEYikSAWiy0bULUb2agF0c3Dw07jsht98en1WG4vY4V8G/E300hlKUIqSJUdjy8tIZCVYzIdMJfLGWEul8vLWt9lXZc/QiiP61/PJR9+j7gvFWZqPkMwGCKUPEio7wDBwUM4bpDHPdoaOpV0qhwLl7gqVmE8WMCtN4dkBZwA0WiMAwcOcPz4cY4fP04ymVz24pTNZpfNW/HP4+7r6zNdljKzpBdGuW7UgthNh4fW/9797Ckh300DhdarskVEJSnhryRlql61WjXerzTiiAcsW8tlNonfgpGvyThUWWwgkwX9c1AKhQKXFwo8XQjRN3oC7/AQbiTe9vMOqQZHI2WOhYpcFasSqefxvOYLTTwQJ9E/zNjYGOPj44yNjS1bfjwzM2MOUCWzLi9AkjCJRqNGvHeLXQIb/7e3UQtiN4mn9b93P3tGyLc7DSD4F/5K1fxXj1zkjx56iiuZEqN9Ed556zW86eYjy6ps/4GbdELKfBHpevS3vgPLtsX4Z3VL1SqT96SJRap/2RCfzWbJ5XLMZfKcL8gG+EHy7vHmk0lsPtat0IyHKxwNFTkaLnLALaFoirCjHBID/aRSKQ4dOsTo6Ch9fX2mbb9YLJq2fmDZuwVp9fcfVMoL0m5js//2NmJB7CbxtP737mfP7Oxcbbdmt9ZFiW3hr3xbEyOO4/DF70/xwS/8iEpDgeui3ADRYID/66dv5M03HTLVtn+jjb/zUURZLAT/4/pjf8CyOJ0kMmRuydzcHPPz882xr9kcF/Mw2Ugy7Q6x4A6gO9hFORSscTRcYtzNMh4sEQsq49vLYKn+/n6GhoYYHBwkHo+b6lpsIPl5SVXtt03khSgSiexowmSjbNW/vd30DtOyO9jKVW+7gm55iiKcrdaIiJC85ZeOR4nBieD8l298j6oKolyN1h5etUyuWOGDn/h7XpB4ibFG5MUAlraYB4NBc4gpzTb+KYHhcJhUKmUidY7jmAXDV65cYXZ21myJn8rXuaz7mHYGmQ+coBYItf23HXUaHA2XOBzIcyxSIunUfMsSBszvk8mlrkppyJGRtFJt+xcISxEhlpJ/1OtuF28/W+Vn28NDy0bZM0LejqfoF2x/1E8+L2/vRbQlziaiJKLvF/lL03N4tSo06niNKrpeA62ZyHhmkwwseeTyvf4tOTL/+lsXMnzyOzPMVRWj/Unues3VvGqsj3Q6zcTEBHNzc8zNzZHJZJjPFZmsJ5hxh5kNvIBSONn2z9JFMxYqcSRY4HAwz2ioRjgsfnu/mTfSHEIVN3E/sXVkpKv8/PwjX1sPWv1TBntJvP2s9m9P06zWd3Mlbav+vcGeEfK1PEURZhFd/xZ2eavv3zzuj/j5ByvBkvjL76W6llkjg+SZKpbRngcOKO3gaY+ReNi0i/sPHWV2ibxQJBIJ4vE4Dz05z58+OkmxCvVCnucWrvCBc4/yU9emONHnsJDJMlkOckUNMBe4ilxoCMLtH/4NByocCxcZc3MciVRIREKLQj1skjPyriEcDi8T79ZBUvJ7aUry/xz9L4y7Kd/dCWvtde3WWc1WCO5OnStZus+eEfI7bx5Ha829f/1DJhcKHEpF+LVbr+I1J+LMz88vW3IALKsK/ZvI/cLij+u1Jj4ksy22iwjUO155jA9/+UkqNQ/dqFOv1Qg78NYbR1hYWACWBlH19fURj8fNHGylFIVCgampKX7/k18jnU5TL5eolYvUinncUIjPasX4iavJRm6kkQi1/fOKO3WOhgocDhY4GiownGj67NFoP5FIs8lHstj+FxnJbEvkT2wfqcJljZmkb/xWlFTee0G8/ay317XT4VFbJbh20NXeoScPO/0+tgi0CKr/AFJEBDC7Ef07Ev3t7P6qETA+tUT//OvKRIhEfOXxpdPwoe9P8Mmzl5jNlRiOh/n5lx/j1hvGiEajxGKxZUKXz+dZWFggm82Sz+fNgKf3ffwf8eoe4cEDRIaPET1wnEC8r+2fWQCP8WCRw8EcxyNlDkY0yWSz+1EGZIlwy2wS8eLFPgmHw8uEGzDvXiKRiHnB858l+LPw+4ETd3+Olf6LUsCz9/zU8z6/kUp7tcNUaB6orledr/YYm71Wy86zJw47pSHGXwm3ZrOl2pWcsRyurSTcfo9buh79HZCS05aPUCi0bNZJNps1DTkiXFprbnvBOK+/6ZgZ/SqPX6/XyefzzM7OLlsgnE6nm/HAQpELxSBX6OfAS96I2zfatm+staaeX6CaucK/ujHBkWiNRKy5DScYHDIb3cWnFy87Ho+brHYymTQHk3KWID9POeCU7lH5exDxlo+9xEZEdzNnNRuttNc6NF2vOl/rMXZTVt3SGT31X1o2myWbzS4bCuUXS/9cDf/BGrDMD/dvrBHRhqVKX8RKrAPZ7D4/P28EXgZHSTUqiQtYOtTzD3OSj3w+bzoz0+kMk0W4VE8wFxwnFz6Il2r+lbTzF9Mo56mmr1BNz1IvzKO0ZmSgjx87edzEE+X5wVLbfyKRMHE/+Xn6zwbk5yzNS/Lc5GBY7kt8773IRkV3M/nvjVobqwnuWt8j1/yeTz5Ko+Vdt9x+N2XVLZ3RU//VieD4uyBXqlj9SZB6vW6y2v4ooYi23I/fWhEPPJfLLVsULGIl86tFGIVisWhazP37J2VOSD6fZyZf5UIlxpQaYCF8NbVY+9VPkAZjbo6j4SK17Cxf+uFF6ioEgRBOPEk0HOPtLz9sooDyAiW2ifzev0BBIoLyuS98f4b/8JVzTKaLHEqG+PXbruKnTo8Cy+e/9GriZKNsVHQ30zyz0djiWoepq32PvPC0irj/9rbRZ+/QU0Iu282F1g5HsVzkV79XLq3e/jGm0mFYr9fJ5XLk83kj3H5/N5FImJVmYp+IxSMvCOVy2bSay33VajXS+RLP5l0mvRSzgWsoBvvb/qkrNAecZiTweLjE0YQmEY8RjcYJhQa46vAh/vvZSeaLFYaTYd7+smO85oYxs2dSxFpeCGVDvHzO31jkOA6f/s4lfuvT36NYqYLnMTFX5v2f+T6BQJCfeemxPS/efjaTFd9o/nszs8Jh9cPUlb5npReelW5vs+p7g54SchFs/+wSEW4RWf8hqN9iCYVCy6p0WTwgbfV+MYvH48sO9WSioFgzUoVL047sq6zX65TKFZ7LelyoxrisD5IJDqLD7R/01YsZVH6aN5+McCxWpS8WXvSwU6YaFpvp1hf08boXHl1Wafvz3K0LFWQWi39aoKRzKpUK/+6vvkuxVAbloAJBcFwqOPz+Q8/wsy873o2/0l1Lqx/eHwuyUKw973ad+MmbsTZEcFstntW+Zy1f3done4+eEvJ8Pk82mzVVtlTDIuB+79y/NUfa1sUTl+RJ66Gcf553a5qnuUihagZZ+UfGXloocS4fYKKRYtY9Rt0JQ7C95+hVy1TTU1Qz09RyM6h6FYJhbnndK42t5K+qgWULE+S5+4dyyTsZEXJ/Llzwjx6Q53slV0eFIii1PC6419d3reSHBx1F0FXUGkv/LjoVxHasjU6HbrlK2YXIe5CeEnJ/tQ0YqwQwqZBW4S4Wi8vy4VKd+7s65b7lfqQ7UapxafaRdwDzuRJPzDe4UItzhWFKbqI54rWNeLSLx2igyLFwib//7hOk5+cABxWKQjiBG3UYTsUZGBgwQi7XKS9CrTnuZh48umzM60qdk/6OVhF///qz8eFUz6Qautkws5ItUfM0/dEg8XCgq35yO9ZGJ0O3rIjvTXpKyGEpReFvrfePefUnKWAp6y3rv6Tqbs2Vy1yQer1upg7KC0G+WOJHc1WeKYSY9FJk3LHmLso2HZMht8yRUIETkTJHIlWCjiYYDDJ0+iAff9Sj4QVw3AA4QSKxCL/46pOkUinzItOa1xYbxb8ZZ7Uxr35rar2sd6+kGrrZMPPAIxOr+tCZUo3vvv/1nV3sNmEPMvcXXRFypdTtwEdoStsfa63v6cb9tiLLf1tnc/s7NgGTRhEBB4wAiuj5LZlGo2G2wheLRdLpDM8tlHgq63KxFmfOHafhBNuuuqWL8li4xIlYhUSgaQk1K+c+QqEQ8Xicq6+OcODwYT7+7StM58uMJMK845areP3p8WUzXfyZ776+vmVb41dCct7+SY0biQv2ihh0q0NRXhBWYze+E1kLe5C5f+hYyJVSLvBHwE8Al4BvK6U+o7X+Qaf33crU1BTPPfecETQRYjnwE3tBqkuJ0C1ep7Eh/HO6i8Ui2WyWidkMTyxoLlRjzDhDlN1Y80Hb8LqDyuNwqMSRUJGrEzUG3SquK/59yOS15Vc5XAV43QvivPFFJ4xVpJSiWq2ag9hYLGbEu3XGiR//9Ea/eK8WF1zNmugFMejW9MG1kh678Z2IxSJ0oyJ/GXBOa/0MgFLq48BbgK4LealUolwuL4sQ+lMb0vou/rF/TkqtVjPNONlslum5BZ7JaJ7KB5min6x7TXMXZVuHlJrRUJVjoSInomWOxOoEnKWO0HA4aZpq+vr6lnnWMltcfHlYilWK8PrFWzzyFa9i8Xn618dJ9HAt8Z5Il1Bg2rV7bXhStzoU1xJ+6y1bdjPdEPJx4KLvz5eAl3fhfp9HMpk0beP+Ls5QKLRM1AGzysxsxZmb53y6yrl8kEv1OPPucTwVaPsn0B+ocyxS4nikzLFIhb6oCKWD40TMO4N4PE4qlVo2X0V8e38jkj91E4lEjG0i885Xq7z9w7xEvGXBw1qjYVt95da2kV4antQtL3+1F4Tx/mhP/Bws+5duCPlKSvG8djKl1F3AXQBHjx5t64Gi0ShDQ0NG8MTj1VpTLpdNtZ3JZMjn81yYyXAuF+BiPc60GqXqLDYTtfGsI47H0Uhz1OvViRrDkWbsz/MclGraMHJdsVjMLIAQn16u0+/Pw5JXHYlEiMViZjjVWiLcusRZHlsq/Y1MF1yvYQR6J2bYLS+/Vw53LZZWuiHkl4Ajvj8fBiZbb6S1vg+4D5rTD9t5oHg8TrVaBTBb1nO5nNl1eXlmjvOFIM+WwlzR/eTcseY3tpEucdAcjtY5evPzLAAAC85JREFUHmlugD8S14RDQVw3iNZLlb+IsoykjcfjpnFIrJVIJEKj0TC2iMxlEfGWuOB687nXiwtuho2I9Fh/tONY328/8Bh/+c2LNLTGVYpfePkRPnjn6U1d60bohpffK4e7Fksr3RDybwPXKKVOABPA24F/3oX7fR6ZTIYLFy6YrswrU9M8m6lzoRJjspFizjnVjAW2Oe56OFTnqniVE9EKVyc9YiF3sTKOL4v7iT3iX64gtoZU3yLIUlmLRSLNOSLk61XQm4kLbob1BjFFgy63XjfSUazvtx94jL/4xoWl56K1+fNWiHk36IXDXYullY6FXGtdV0r9CvBFmrXvR7XW3+/4ylbgmWee4W/PPs75coSL1ThXOE5NLS5XaEPTYq7HyXiNk4ka1yQbDIRZ9NujJm/uF2/Jaov9IZFAyaX79276Z5mIBSTDtsQvXw3/6jcRbxl41a3pgu99wyne/YnvrjiPWrr/Oo31/eU3L676+d0q5J1iV6dZdoKuqILW+vPA57txXyvx1Sem+ZsnpvjCdyrMVa9rfrKNeU0BpTkaq3Eqpbkm1eBoykWhCQSa3rm04YuQiqedSqVIpVLP27HpP1T0R/ukIvf73+uJ8GpxQf9i525y583jnH1uno9948IyMfd3/737E99d8Xs36p2vNnlvtc/3Gq2ifet1I3zq4Qm7Os2y7fREZ+ef/dN5vvbkDO2U3aPhOtckG1w/qLh2wCWy6FP7B2xJ5Su/hsNhRkZGiMfjRrxlY5B/SqDk1P1zS1oXL6yXONlMXLDbfPDO05w5NrhqBdlprM9VakXRVjS33vRy1bpSN2nriyL0VvrH0rv0hJD/+NXDi0K+PqmAx9XJOtcPwI0jIQaiYQDTgi/doDJ4S5puIpEIQ0NDRCIRXNc1t6nVakaUZdGw2Cd+8RX/e7353CvFBWU+yk5skl/LE+40xfELLz+yzCMXHEeZF4herVpXsp1We5/RK+kfS+/SE0L+z64ZAX644tdCjuaqRIPr+uHG4QDHBiLLstkyRKtarZoooDTexONxs1BYBk9JnE86LmXHZjAYXOaZS+Uuvvd6cUF/4gSWst67eZN8pykO8cH9qZVI0KFQXd137xWPeTPi3Gut/ZbeoyeE/NqDCQ4kw0znKig0h2Me1w8qXjDkcv1IhFDANR2ckimvVCpmtoj42uJ3S2Utt5f5JZLjTiQSJiron6go4iuWynqJE3/l7Y8L9tIm+U5THB+88/Syg80Td39uxdtNpktbti1+K1jNdvJ3yILNoVu2h54QcqUU733DKTKzUxwgzWAismxbj2yjlzniItzQjP/19/cbe0Q6KGVqoHRQyhYgsVPkvqTRRirv9TLbWxUX3Cus5bt3a/jVdrCa7fQzLxnnq0/M7Pp3FJa9RU8IOcDbzhzh2WfrTE4uLT8uFovLlgCLSPuz2v6ZK0opEokEiUSCwcFBwuGwSar4PWvAbNkR8V4rcbJSXHCvbpLvlLV8905TMtuJbR6y7CZ6SmXy+TwzMzMmQSLWSCAQMNME/YeNIuDRaJT+/n4GBwfNNMRyuWz2bLZWzv5I4WqsNRp2taXQlrUFcLWdlKt5zDvtp9vmIctuoaeEvFwuUygUAIzdIeLtH0AVCARIJpP09fUxMDBghlXJJnt/C71UzX7x3sxo2O2MC+4VVhPAzaRkeslPt1i2mp4Sctlon0gkTOQPMPHBVCplxr2K3y0zx2XBBCzF/fydl70SF9zLbMau6CU/3WLZanpKyJPJJENDQ8BSJdzf37/ssFKy4tVq1dgmcnv/dvm1Yn+9GhfcC2zUrujWMol22Wlbx2Lx01NCHgwGGRwcJJFImJigbNIR8RbbQ2wTEfCNTAlcaZP8bo0L7nch6dYyiXawto5lt9FTQj42NmaWL9frdcrlshFvOfiUSl2q7/VifyuNht3I9+0k+1VI/C9efdEgQVdRayyltmVi41a3/1tbx7Lb6Ckhh+YCZkmbSJu9v/KWj70cF9yPQtL64pUu1Qg6ioFYkHSxtq1Dq3ba1rFYWtn9quWjWCySz+dNJlzsEv/UwbUSJ+1skt+N7DUh2YhNtNKLV83TxEIBHvmd1wPNQVzb8QK3k7aOxbISPaVgkhYRAW4nLrjWJvleYS8JyUZtovVevB54ZGLVRRndfoGzK+Esu43ddYK3DuFw2Cx2iMViK8b/tNZUq1WKxSKFQoFKpbK4yT5stvn0emzwvW84RTS43L/vVSFZyybys9qLlKyjE/Ff7TbQFPtb7nmIE3d/jlvueYgHHplo65rvvHmcD731NOP9URTN5cwyw91i2Ql6qiJfzQLZb3HBvdQevlGbaK0qeK1F0nKbbh8Q265Oy26ip4S8lV6KC3abvSIkG7WJ1nrxWm1GC2Aq5e3yzy2WnaDnhLybm+QtO89m/ObVXrxWezEY74+a2++1A2KLxU9Pla3lcplSqUS9XicQCBCLxUgkEoTDYSviPUo3/OaNnBms5bFbLL1OT1XkklLptbigZW06tYk2cmZgkyaWvUxPKaKtui2rsd6LwV46ILZYWukpIbdYOmGvHBBbLK1YIbfsa/b78DHL3qCjw06l1L1KqSeUUt9TSn1aKdXfrQuzWLYayZZPpEtolrLl7TYKWSw7RaeplS8DN2qtXwj8CHhf55dksWwPG+0qtVh2Ox0Judb6S1rr+uIfvwEc7vySLJbtwWbLLXuFbubIfwn4Qhfvz2LZUmy23LJXWFfIlVJfUUo9vsLHW3y3+S2gDnxsjfu5Syl1Vil1dmZmpjtXb7F0wF4aPmbZ36ybWtFav26tryul3gHcAbxWy4LMle/nPuA+gDNnzqx6O4tlu7DZcsteoaP4oVLqduA3gFdrrYvduSSLZWN0Izpos+WWvUCnOfI/BMLAlxfne39Da/2/dnxVFss67Ne9pRbLSnQk5Frrq7t1IRbLZtiPe0stltXoqemHFotgo4MWyxJWyC09iY0OWixLWCG39CQ2OmixLGGHZll6EhsdtFiWsEJu6VlsdNBiaWKtFYvFYulxrJBbLBZLj2OF3GKxWHocK+QWi8XS41ght1gslh5HrTGwcOseVKkZ4Lltf+D2GQZmd/oitpn9+JzBPu/9RC8+52Na65HWT+6IkPcaSqmzWuszO30d28l+fM5gn/dOX8d2spees7VWLBaLpcexQm6xWCw9jhXyjXHfTl/ADrAfnzPY572f2DPP2XrkFovF0uPYitxisVh6HCvkG0Apda9S6gml1PeUUp9WSvXv9DVtB0qptymlvq+U8pRSe+J0fzWUUrcrpZ5USp1TSt2909ezXSilPqqUmlZKPb7T17JdKKWOKKW+qpT64eK/71/f6WvqFCvkG+PLwI1a6xcCPwLet8PXs108DrwV+LudvpCtRCnlAn8EvBG4AfgFpdQNO3tV28b/A9y+0xexzdSB92itrwdeAfybXv/7tkK+AbTWX9Ja1xf/+A3g8E5ez3ahtf6h1vrJnb6ObeBlwDmt9TNa6yrwceAtO3xN24LW+u+A+Z2+ju1Ea31Za/2dxd/ngB8CPT0P2Qr55vkl4As7fRGWrjIOXPT9+RI9/h+2ZWMopY4DNwPf3Nkr6Qy7WGIRpdRXgNEVvvRbWusHF2/zWzTfln1sO69tK9nI894HqBU+Z+NcexylVAL4FPAurXV2p6+nE6yQL6K1ft1aX1dKvQO4A3it3kOZzfWe9z7hEnDE9+fDwOQOXYtlG1BKBWmK+Me01vfv9PV0irVWNoBS6nbgN4A3a62LO309lq7zbeAapdQJpVQIeDvwmR2+pv+/fTu2TSAIoyA8rwDqoAaLjNAZBRA5pRYiByTESM4JThRAeolFA/SxBKYAJHO3+qX5KnjJjlZarSaSJMARuLXW9r33vIMhf803sAAuScYkh96D5pBkk+QOfADnJEPvTVN4PmTvgIG/h6+f1tpv31XzSHICrsAyyT3JV+9NM1gBW2D9PM9jks/eo/7Dn52SVJw3ckkqzpBLUnGGXJKKM+SSVJwhl6TiDLkkFWfIJak4Qy5JxT0AGiHMHEIgdh8AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ys = sample_predictions(LinearRegression(), X)\n", + "plt.plot(X, ys.T, color='k', alpha=0.05);\n", + "plt.scatter(X, y)\n", + "plt.plot(X, expected_prediction(LinearRegression(), X), lw=5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Podemos ver que si bien con distintos samples de los datos se obtienen diferentes modelos, son todos bastante parecidos entre si, y hacen predicciones similares (baja varianza)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cambiando regresion lineal por arbol de decisión" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2f5d3662750443689a43ec0e0d7e6666", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(IntSlider(value=1, description='min_samples_leaf', max=50, min=1), Output()), _dom_class…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.tree import DecisionTreeRegressor\n", + "from ipywidgets import interact, widgets\n", + "\n", + "def draw(min_samples_leaf):\n", + " dt = DecisionTreeRegressor(min_samples_leaf=min_samples_leaf).fit(X, y)\n", + "\n", + " plt.plot(X, dt.predict(X), '--k')\n", + " plt.scatter(X, y)\n", + " \n", + "interact(\n", + " draw, \n", + " min_samples_leaf=widgets.IntSlider(min=1, max=50, step=1, value=1)\n", + ");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Que pasa si lo miramos con observaciones fuera del rango entrenado?" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.tree import DecisionTreeRegressor\n", + "\n", + "plt.plot(oos_X, dt.predict(oos_X), '--k')\n", + "plt.scatter(oos_X, oos_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Podemos ver como se ajusta a la perfeccion a los datos\n", + "\n", + "# Haciendo muchas predicciones" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ys = sample_predictions(DecisionTreeRegressor(), X)\n", + "plt.plot(X, ys.T, color='k', alpha=0.05);\n", + "plt.scatter(X, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Podemos ver que al tomar muchas muestras los modelos terminan cubriendo la zona de alta densidad de bolitas\n", + "\n", + "Podemos ver que el modelo esperado se parece mucho a la funcion que genera los datos" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 149, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ys = sample_predictions(DecisionTreeRegressor(), X)\n", + "plt.plot(X, ys.T, color='k', alpha=0.05);\n", + "plt.scatter(X, y)\n", + "plt.plot(X, expected_prediction(DecisionTreeRegressor(), X), lw=5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Podemos ver que en esperanza el modelo le pega muy bien, sin embargo en cada realizacion del dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Como se ve con observaciones out of sample?" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ys = sample_predictions(DecisionTreeRegressor(), oos_X)\n", + "plt.plot(oos_X, ys.T, color='k', alpha=0.05);\n", + "plt.scatter(X, y)\n", + "plt.plot(oos_X, expected_prediction(DecisionTreeRegressor(), oos_X), lw=5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Calculando sesgo y varianza" + ] + }, + { + "attachments": { + "image.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![image.png](attachment:image.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Recordar que en las esperanzas de la segunda ecuacion son sobre las variables que estan en la expresion de MSE.\n", + "\n", + "En este caso, por construccion **sabemos** el valor de `E[f(x)]`.\n", + "Es decir `E[f(x)] = x^2 + x` (ver funcion sample_data)\n", + "\n", + "Con eso podemos calcular el sesgo y la varianza para este caso" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [], + "source": [ + "def bias(model, x, y, iters=1000):\n", + " ideal_y = (x**2 + x).squeeze()\n", + " errors = (\n", + " ideal_y - expected_prediction(model, x, iters=iters)\n", + " ) ** 2\n", + " return errors.mean()\n", + "\n", + "def variance(model, x, y, iters=1000):\n", + " ideal_y = (x**2 + x).squeeze()\n", + " # repetimos la esperanza en `iters` filas para facilitar el computo\n", + " ideal_y = np.repeat(ideal_y.reshape((1, -1)), iters, axis=0)\n", + " \n", + " # sampleamos iters predicciones de modelos\n", + " ys_hat = sample_predictions(model, x, iters=iters)\n", + " \n", + " # Estimamos la varianza por cada par (x_i, y_i) del dataset, luego promediamos las varianzas\n", + " return np.var(ideal_y - ys_hat, axis=0).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "De esta forma podemos calcular el sesgo y la varianza de una regresion lineal" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.393858748727712" + ] + }, + "execution_count": 171, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bias(\n", + " LinearRegression(), \n", + " *sample_data(1000, x_matrix=True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.10906147472344568" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "variance(\n", + " LinearRegression(), \n", + " *sample_data(1000, x_matrix=True)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Que pasa si jugamos con regularizacion L2?\n", + "\n", + "A la regresion con regularizacion L2 se le suele llamar `Ridge`. El parámetro lambda en scikit-learn se llama alpha (a mas grande, mayor regularizacion)" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4.408022545024507" + ] + }, + "execution_count": 181, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.linear_model import Ridge\n", + "\n", + "bias(\n", + " Ridge(alpha=500), \n", + " *sample_data(1000, x_matrix=True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.05763412973111156" + ] + }, + "execution_count": 182, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "variance(\n", + " Ridge(alpha=500), \n", + " *sample_data(1000, x_matrix=True)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Y podemos calcular el sesgo y la varianza de un decision tree" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0012359837781728128" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bias(\n", + " DecisionTreeRegressor(min_samples_leaf=1), \n", + " *sample_data(1000, x_matrix=True), \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.014141026848157" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "variance(\n", + " DecisionTreeRegressor(min_samples_leaf=1), \n", + " *sample_data(1000, x_matrix=True), \n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Se puede ver que el decision tree tiene **muchisimo menos sesgo**, pero tiene una varianza 10 veces mas alta.\n", + "\n", + "### Qué pasa si utilizamos min_samples_leaf = 10?" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.12437374093975646" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bias(\n", + " DecisionTreeRegressor(min_samples_leaf=10), \n", + " *sample_data(1000, x_matrix=True), \n", + " iters=1000\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.4022126724472044" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "variance(\n", + " DecisionTreeRegressor(min_samples_leaf=10), \n", + " *sample_data(1000, x_matrix=True), \n", + " iters=1000\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "65c671c52ea64930bc656c43a45dbb75", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(IntSlider(value=10, description='min_samples_leaf', max=50, min=1), Output()), _dom_clas…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from ipywidgets import interact, widgets\n", + "\n", + "def draw_predictions(min_samples_leaf):\n", + " ys = sample_predictions(\n", + " DecisionTreeRegressor(min_samples_leaf=min_samples_leaf), \n", + " oos_X,\n", + " iters=100\n", + " )\n", + " plt.plot(oos_X, ys.T, color='k', alpha=0.05);\n", + " plt.scatter(X, y)\n", + " plt.plot(\n", + " oos_X, \n", + " expected_prediction(\n", + " DecisionTreeRegressor(min_samples_leaf=min_samples_leaf), \n", + " oos_X\n", + " ), \n", + " lw=5\n", + " )\n", + " plt.xlim(-5, 5)\n", + " plt.ylim(-4, 12)\n", + " \n", + "\n", + "interact(\n", + " draw_predictions, \n", + " min_samples_leaf=widgets.IntSlider(min=1, max=50, step=1, value=10)\n", + ");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Un modelo con el sesgo perfecto" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "# Ajustamos un modelo cuadrático\n", + "model = make_pipeline(PolynomialFeatures(), LinearRegression())" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ys = sample_predictions(model, oos_X)\n", + "plt.plot(oos_X, ys.T, color='k', alpha=0.05);\n", + "plt.scatter(X, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "En este caso, al elegir el sesgo correcto, el modelo generaliza inclusive a datos nunca vistos de forma correcta.\n", + "\n", + "\n", + "En este caso tenemos un modelo de bajo sesgo y baja varianza (algo muy raro en la práctica). \n", + "\n", + "Si bien este es un ejemplo de juguete, sirve para ejemplificar como un sesgo introducido de forma correcta al problema puede ser muy beneficioso en la calidad del modelo final" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4.344946751807706e-05" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bias(\n", + " model,\n", + " *sample_data(1000, x_matrix=True), \n", + " iters=1000\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.029742082291003166" + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "variance(\n", + " model,\n", + " *sample_data(1000, x_matrix=True), \n", + " iters=1000\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}