{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Hybrid Recommenders" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Import or compute the cosine_sim matrix\n", "cosine_sim = pd.read_csv('../data/cosine_sim.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#Import or compute the cosine sim mapping matrix\n", "cosine_sim_map = pd.read_csv('../data/cosine_sim_map.csv', header=None)\n", "\n", "#Convert cosine_sim_map into a Pandas Series\n", "cosine_sim_map = cosine_sim_map.set_index(0)\n", "cosine_sim_map = cosine_sim_map[1]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#Build the SVD based Collaborative filter\n", "from surprise import SVD, Reader, Dataset\n", "\n", "reader = Reader()\n", "ratings = pd.read_csv('../data/ratings_small.csv')\n", "data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)\n", "data.split(n_folds=5)\n", "svd = SVD()\n", "trainset = data.build_full_trainset()\n", "svd.train(trainset)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#Build title to ID and ID to title mappings\n", "id_map = pd.read_csv('../data/movie_ids.csv')\n", "id_to_title = id_map.set_index('id')\n", "title_to_id = id_map.set_index('title')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#Import or compute relevant metadata of the movies\n", "smd = pd.read_csv('../data/metadata_small.csv')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def hybrid(userId, title):\n", " #Extract the cosine_sim index of the movie\n", " idx = cosine_sim_map[title]\n", " \n", " #Extract the TMDB ID of the movie\n", " tmdbId = title_to_id.loc[title]['id']\n", " \n", " #Extract the movie ID internally assigned by the dataset\n", " movie_id = title_to_id.loc[title]['movieId']\n", " \n", " #Extract the similarity scores and their corresponding index for every movie from the cosine_sim matrix\n", " sim_scores = list(enumerate(cosine_sim[str(int(idx))]))\n", " \n", " #Sort the (index, score) tuples in decreasing order of similarity scores\n", " sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)\n", " \n", " #Select the top 25 tuples, excluding the first \n", " #(as it is the similarity score of the movie with itself)\n", " sim_scores = sim_scores[1:26]\n", " \n", " #Store the cosine_sim indices of the top 25 movies in a list\n", " movie_indices = [i[0] for i in sim_scores]\n", "\n", " #Extract the metadata of the aforementioned movies\n", " movies = smd.iloc[movie_indices][['title', 'vote_count', 'vote_average', 'year', 'id']]\n", " \n", " #Compute the predicted ratings using the SVD filter\n", " movies['est'] = movies['id'].apply(lambda x: svd.predict(userId, id_to_title.loc[x]['movieId']).est)\n", " \n", " #Sort the movies in decreasing order of predicted rating\n", " movies = movies.sort_values('est', ascending=False)\n", " \n", " #Return the top 10 movies as recommendations\n", " return movies.head(10)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlevote_countvote_averageyearidest
1011The Terminator4208.07.419842183.140748
974Aliens3282.07.719866793.126947
8401Star Trek Into Darkness4479.07.42013541383.079551
7705Alice in Wonderland8.05.41933256943.054995
3060Sinbad and the Eye of the Tiger39.06.31977119403.028386
8658X-Men: Days of Future Past6155.07.520141275852.997411
2014Fantastic Planet140.07.61973163062.957614
522Terminator 2: Judgment Day4274.07.719912802.914548
1621Darby O'Gill and the Little People35.06.71959188872.844940
1668Return from Witch Mountain38.05.61978148222.804012
\n", "
" ], "text/plain": [ " title vote_count vote_average year \\\n", "1011 The Terminator 4208.0 7.4 1984 \n", "974 Aliens 3282.0 7.7 1986 \n", "8401 Star Trek Into Darkness 4479.0 7.4 2013 \n", "7705 Alice in Wonderland 8.0 5.4 1933 \n", "3060 Sinbad and the Eye of the Tiger 39.0 6.3 1977 \n", "8658 X-Men: Days of Future Past 6155.0 7.5 2014 \n", "2014 Fantastic Planet 140.0 7.6 1973 \n", "522 Terminator 2: Judgment Day 4274.0 7.7 1991 \n", "1621 Darby O'Gill and the Little People 35.0 6.7 1959 \n", "1668 Return from Witch Mountain 38.0 5.6 1978 \n", "\n", " id est \n", "1011 218 3.140748 \n", "974 679 3.126947 \n", "8401 54138 3.079551 \n", "7705 25694 3.054995 \n", "3060 11940 3.028386 \n", "8658 127585 2.997411 \n", "2014 16306 2.957614 \n", "522 280 2.914548 \n", "1621 18887 2.844940 \n", "1668 14822 2.804012 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hybrid(1, 'Avatar')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlevote_countvote_averageyearidest
522Terminator 2: Judgment Day4274.07.719912803.943639
2834Predator2129.07.319871063.866272
8401Star Trek Into Darkness4479.07.42013541383.858491
1011The Terminator4208.07.419842183.856029
7705Alice in Wonderland8.05.41933256943.701565
922The Abyss822.07.1198927563.676465
974Aliens3282.07.719866793.672303
1621Darby O'Gill and the Little People35.06.71959188873.628234
1668Return from Witch Mountain38.05.61978148223.614118
2014Fantastic Planet140.07.61973163063.602051
\n", "
" ], "text/plain": [ " title vote_count vote_average year \\\n", "522 Terminator 2: Judgment Day 4274.0 7.7 1991 \n", "2834 Predator 2129.0 7.3 1987 \n", "8401 Star Trek Into Darkness 4479.0 7.4 2013 \n", "1011 The Terminator 4208.0 7.4 1984 \n", "7705 Alice in Wonderland 8.0 5.4 1933 \n", "922 The Abyss 822.0 7.1 1989 \n", "974 Aliens 3282.0 7.7 1986 \n", "1621 Darby O'Gill and the Little People 35.0 6.7 1959 \n", "1668 Return from Witch Mountain 38.0 5.6 1978 \n", "2014 Fantastic Planet 140.0 7.6 1973 \n", "\n", " id est \n", "522 280 3.943639 \n", "2834 106 3.866272 \n", "8401 54138 3.858491 \n", "1011 218 3.856029 \n", "7705 25694 3.701565 \n", "922 2756 3.676465 \n", "974 679 3.672303 \n", "1621 18887 3.628234 \n", "1668 14822 3.614118 \n", "2014 16306 3.602051 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hybrid(2, 'Avatar')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }