tum
/
rec-code


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038
							{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Collaborative Filtering\n",
    "\n",
    "## The Framework"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>occupation</th>\n",
       "      <th>zip_code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>85711</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>53</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>94043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>23</td>\n",
       "      <td>M</td>\n",
       "      <td>writer</td>\n",
       "      <td>32067</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>43537</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>33</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>15213</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user_id  age sex  occupation zip_code\n",
       "0        1   24   M  technician    85711\n",
       "1        2   53   F       other    94043\n",
       "2        3   23   M      writer    32067\n",
       "3        4   24   M  technician    43537\n",
       "4        5   33   F       other    15213"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Load the u.user file into a dataframe\n",
    "u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']\n",
    "\n",
    "users = pd.read_csv('../data/movielens/u.user', sep='|', names=u_cols,\n",
    " encoding='latin-1')\n",
    "\n",
    "users.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>release date</th>\n",
       "      <th>video release date</th>\n",
       "      <th>IMDb URL</th>\n",
       "      <th>unknown</th>\n",
       "      <th>Action</th>\n",
       "      <th>Adventure</th>\n",
       "      <th>Animation</th>\n",
       "      <th>Children's</th>\n",
       "      <th>...</th>\n",
       "      <th>Fantasy</th>\n",
       "      <th>Film-Noir</th>\n",
       "      <th>Horror</th>\n",
       "      <th>Musical</th>\n",
       "      <th>Mystery</th>\n",
       "      <th>Romance</th>\n",
       "      <th>Sci-Fi</th>\n",
       "      <th>Thriller</th>\n",
       "      <th>War</th>\n",
       "      <th>Western</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>01-Jan-1995</td>\n",
       "      <td>NaN</td>\n",
       "      <td>http://us.imdb.com/M/title-exact?Toy%20Story%2...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>GoldenEye (1995)</td>\n",
       "      <td>01-Jan-1995</td>\n",
       "      <td>NaN</td>\n",
       "      <td>http://us.imdb.com/M/title-exact?GoldenEye%20(...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Four Rooms (1995)</td>\n",
       "      <td>01-Jan-1995</td>\n",
       "      <td>NaN</td>\n",
       "      <td>http://us.imdb.com/M/title-exact?Four%20Rooms%...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Get Shorty (1995)</td>\n",
       "      <td>01-Jan-1995</td>\n",
       "      <td>NaN</td>\n",
       "      <td>http://us.imdb.com/M/title-exact?Get%20Shorty%...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Copycat (1995)</td>\n",
       "      <td>01-Jan-1995</td>\n",
       "      <td>NaN</td>\n",
       "      <td>http://us.imdb.com/M/title-exact?Copycat%20(1995)</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id              title release date  video release date  \\\n",
       "0         1   Toy Story (1995)  01-Jan-1995                 NaN   \n",
       "1         2   GoldenEye (1995)  01-Jan-1995                 NaN   \n",
       "2         3  Four Rooms (1995)  01-Jan-1995                 NaN   \n",
       "3         4  Get Shorty (1995)  01-Jan-1995                 NaN   \n",
       "4         5     Copycat (1995)  01-Jan-1995                 NaN   \n",
       "\n",
       "                                            IMDb URL  unknown  Action  \\\n",
       "0  http://us.imdb.com/M/title-exact?Toy%20Story%2...        0       0   \n",
       "1  http://us.imdb.com/M/title-exact?GoldenEye%20(...        0       1   \n",
       "2  http://us.imdb.com/M/title-exact?Four%20Rooms%...        0       0   \n",
       "3  http://us.imdb.com/M/title-exact?Get%20Shorty%...        0       1   \n",
       "4  http://us.imdb.com/M/title-exact?Copycat%20(1995)        0       0   \n",
       "\n",
       "   Adventure  Animation  Children's   ...     Fantasy  Film-Noir  Horror  \\\n",
       "0          0          1           1   ...           0          0       0   \n",
       "1          1          0           0   ...           0          0       0   \n",
       "2          0          0           0   ...           0          0       0   \n",
       "3          0          0           0   ...           0          0       0   \n",
       "4          0          0           0   ...           0          0       0   \n",
       "\n",
       "   Musical  Mystery  Romance  Sci-Fi  Thriller  War  Western  \n",
       "0        0        0        0       0         0    0        0  \n",
       "1        0        0        0       0         1    0        0  \n",
       "2        0        0        0       0         1    0        0  \n",
       "3        0        0        0       0         0    0        0  \n",
       "4        0        0        0       0         1    0        0  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Load the u.item file into a dataframe\n",
    "i_cols = ['movie_id', 'title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',\n",
    " 'Animation', 'Children\\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',\n",
    " 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']\n",
    "\n",
    "movies = pd.read_csv('../data/movielens/u.item', sep='|', names=i_cols, encoding='latin-1')\n",
    "\n",
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Remove all information except Movie ID and title\n",
    "movies = movies[['movie_id', 'title']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>movie_id</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>196</td>\n",
       "      <td>242</td>\n",
       "      <td>3</td>\n",
       "      <td>881250949</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>186</td>\n",
       "      <td>302</td>\n",
       "      <td>3</td>\n",
       "      <td>891717742</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>22</td>\n",
       "      <td>377</td>\n",
       "      <td>1</td>\n",
       "      <td>878887116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>244</td>\n",
       "      <td>51</td>\n",
       "      <td>2</td>\n",
       "      <td>880606923</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>166</td>\n",
       "      <td>346</td>\n",
       "      <td>1</td>\n",
       "      <td>886397596</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user_id  movie_id  rating  timestamp\n",
       "0      196       242       3  881250949\n",
       "1      186       302       3  891717742\n",
       "2       22       377       1  878887116\n",
       "3      244        51       2  880606923\n",
       "4      166       346       1  886397596"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Load the u.data file into a dataframe\n",
    "r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']\n",
    "\n",
    "ratings = pd.read_csv('../data/movielens/u.data', sep='\\t', names=r_cols,\n",
    " encoding='latin-1')\n",
    "\n",
    "ratings.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Drop the timestamp column\n",
    "ratings = ratings.drop('timestamp', axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Import the train_test_split function\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "#Assign X as the original ratings dataframe and y as the user_id column of ratings.\n",
    "X = ratings.copy()\n",
    "y = ratings['user_id']\n",
    "\n",
    "#Split into training and test datasets, stratified along user_id\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, stratify=y, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Import the mean_squared_error function\n",
    "from sklearn.metrics import mean_squared_error\n",
    "\n",
    "#Function that computes the root mean squared error (or RMSE)\n",
    "def rmse(y_true, y_pred):\n",
    "    return np.sqrt(mean_squared_error(y_true, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Define the baseline model to always return 3.\n",
    "def baseline(user_id, movie_id):\n",
    "    return 3.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Function to compute the RMSE score obtained on the testing set by a model\n",
    "def score(cf_model):\n",
    "    \n",
    "    #Construct a list of user-movie tuples from the testing dataset\n",
    "    id_pairs = zip(X_test['user_id'], X_test['movie_id'])\n",
    "    \n",
    "    #Predict the rating for every user-movie tuple\n",
    "    y_pred = np.array([cf_model(user, movie) for (user, movie) in id_pairs])\n",
    "    \n",
    "    #Extract the actual ratings given by the users in the test data\n",
    "    y_true = np.array(X_test['rating'])\n",
    "    \n",
    "    #Return the final RMSE score\n",
    "    return rmse(y_true, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.2470926188539486"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score(baseline)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## User Based Collaborative Filtering\n",
    "\n",
    "### Ratings Matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>movie_id</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>...</th>\n",
       "      <th>1669</th>\n",
       "      <th>1670</th>\n",
       "      <th>1671</th>\n",
       "      <th>1673</th>\n",
       "      <th>1674</th>\n",
       "      <th>1675</th>\n",
       "      <th>1676</th>\n",
       "      <th>1679</th>\n",
       "      <th>1681</th>\n",
       "      <th>1682</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 1647 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "movie_id  1     2     3     4     5     6     7     8     9     10    ...   \\\n",
       "user_id                                                               ...    \n",
       "1          5.0   3.0   4.0   3.0   3.0   5.0   4.0   1.0   5.0   3.0  ...    \n",
       "2          NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   2.0  ...    \n",
       "3          NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...    \n",
       "4          NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...    \n",
       "5          NaN   3.0   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...    \n",
       "\n",
       "movie_id  1669  1670  1671  1673  1674  1675  1676  1679  1681  1682  \n",
       "user_id                                                               \n",
       "1          NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  \n",
       "2          NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  \n",
       "3          NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  \n",
       "4          NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  \n",
       "5          NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  \n",
       "\n",
       "[5 rows x 1647 columns]"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Build the ratings matrix using pivot_table function\n",
    "r_matrix = X_train.pivot_table(values='rating', index='user_id', columns='movie_id')\n",
    "\n",
    "r_matrix.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#User Based Collaborative Filter using Mean Ratings\n",
    "def cf_user_mean(user_id, movie_id):\n",
    "    \n",
    "    #Check if movie_id exists in r_matrix\n",
    "    if movie_id in r_matrix:\n",
    "        #Compute the mean of all the ratings given to the movie\n",
    "        mean_rating = r_matrix[movie_id].mean()\n",
    "    \n",
    "    else:\n",
    "        #Default to a rating of 3.0 in the absence of any information\n",
    "        mean_rating = 3.0\n",
    "    \n",
    "    return mean_rating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0234701463131335"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Compute RMSE for the Mean model\n",
    "score(cf_user_mean)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Weighted Mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Create a dummy ratings matrix with all null values imputed to 0\n",
    "r_matrix_dummy = r_matrix.copy().fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Import cosine_score \n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "\n",
    "#Compute the cosine similarity matrix using the dummy ratings matrix\n",
    "cosine_sim = cosine_similarity(r_matrix_dummy, r_matrix_dummy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>user_id</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>...</th>\n",
       "      <th>934</th>\n",
       "      <th>935</th>\n",
       "      <th>936</th>\n",
       "      <th>937</th>\n",
       "      <th>938</th>\n",
       "      <th>939</th>\n",
       "      <th>940</th>\n",
       "      <th>941</th>\n",
       "      <th>942</th>\n",
       "      <th>943</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.118076</td>\n",
       "      <td>0.029097</td>\n",
       "      <td>0.011628</td>\n",
       "      <td>0.264677</td>\n",
       "      <td>0.312419</td>\n",
       "      <td>0.308729</td>\n",
       "      <td>0.224269</td>\n",
       "      <td>0.026017</td>\n",
       "      <td>0.286411</td>\n",
       "      <td>...</td>\n",
       "      <td>0.308475</td>\n",
       "      <td>0.055872</td>\n",
       "      <td>0.197862</td>\n",
       "      <td>0.131367</td>\n",
       "      <td>0.152449</td>\n",
       "      <td>0.084456</td>\n",
       "      <td>0.293293</td>\n",
       "      <td>0.056765</td>\n",
       "      <td>0.103536</td>\n",
       "      <td>0.326491</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.118076</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.099097</td>\n",
       "      <td>0.107680</td>\n",
       "      <td>0.034279</td>\n",
       "      <td>0.152789</td>\n",
       "      <td>0.086705</td>\n",
       "      <td>0.078864</td>\n",
       "      <td>0.068940</td>\n",
       "      <td>0.092399</td>\n",
       "      <td>...</td>\n",
       "      <td>0.086927</td>\n",
       "      <td>0.259636</td>\n",
       "      <td>0.289092</td>\n",
       "      <td>0.318824</td>\n",
       "      <td>0.149105</td>\n",
       "      <td>0.186347</td>\n",
       "      <td>0.168034</td>\n",
       "      <td>0.106748</td>\n",
       "      <td>0.136796</td>\n",
       "      <td>0.080358</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.029097</td>\n",
       "      <td>0.099097</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.252131</td>\n",
       "      <td>0.026893</td>\n",
       "      <td>0.062539</td>\n",
       "      <td>0.039767</td>\n",
       "      <td>0.089474</td>\n",
       "      <td>0.078162</td>\n",
       "      <td>0.037670</td>\n",
       "      <td>...</td>\n",
       "      <td>0.040918</td>\n",
       "      <td>0.019031</td>\n",
       "      <td>0.065417</td>\n",
       "      <td>0.055373</td>\n",
       "      <td>0.086503</td>\n",
       "      <td>0.018418</td>\n",
       "      <td>0.096993</td>\n",
       "      <td>0.109631</td>\n",
       "      <td>0.092574</td>\n",
       "      <td>0.018987</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.011628</td>\n",
       "      <td>0.107680</td>\n",
       "      <td>0.252131</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.045543</td>\n",
       "      <td>0.078812</td>\n",
       "      <td>0.095354</td>\n",
       "      <td>0.059498</td>\n",
       "      <td>0.053879</td>\n",
       "      <td>...</td>\n",
       "      <td>0.024226</td>\n",
       "      <td>0.050703</td>\n",
       "      <td>0.056561</td>\n",
       "      <td>0.107294</td>\n",
       "      <td>0.098892</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.132900</td>\n",
       "      <td>0.142798</td>\n",
       "      <td>0.097066</td>\n",
       "      <td>0.015176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.264677</td>\n",
       "      <td>0.034279</td>\n",
       "      <td>0.026893</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.202843</td>\n",
       "      <td>0.299619</td>\n",
       "      <td>0.163724</td>\n",
       "      <td>0.038474</td>\n",
       "      <td>0.153021</td>\n",
       "      <td>...</td>\n",
       "      <td>0.262547</td>\n",
       "      <td>0.048524</td>\n",
       "      <td>0.048312</td>\n",
       "      <td>0.022202</td>\n",
       "      <td>0.091910</td>\n",
       "      <td>0.066000</td>\n",
       "      <td>0.156172</td>\n",
       "      <td>0.115842</td>\n",
       "      <td>0.124297</td>\n",
       "      <td>0.267574</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.312419</td>\n",
       "      <td>0.152789</td>\n",
       "      <td>0.062539</td>\n",
       "      <td>0.045543</td>\n",
       "      <td>0.202843</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.375963</td>\n",
       "      <td>0.131795</td>\n",
       "      <td>0.110944</td>\n",
       "      <td>0.400758</td>\n",
       "      <td>...</td>\n",
       "      <td>0.287549</td>\n",
       "      <td>0.080312</td>\n",
       "      <td>0.162988</td>\n",
       "      <td>0.182856</td>\n",
       "      <td>0.114262</td>\n",
       "      <td>0.092090</td>\n",
       "      <td>0.261859</td>\n",
       "      <td>0.097606</td>\n",
       "      <td>0.206104</td>\n",
       "      <td>0.187637</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.308729</td>\n",
       "      <td>0.086705</td>\n",
       "      <td>0.039767</td>\n",
       "      <td>0.078812</td>\n",
       "      <td>0.299619</td>\n",
       "      <td>0.375963</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.211282</td>\n",
       "      <td>0.107795</td>\n",
       "      <td>0.328923</td>\n",
       "      <td>...</td>\n",
       "      <td>0.290002</td>\n",
       "      <td>0.074170</td>\n",
       "      <td>0.094619</td>\n",
       "      <td>0.084235</td>\n",
       "      <td>0.115620</td>\n",
       "      <td>0.100625</td>\n",
       "      <td>0.233843</td>\n",
       "      <td>0.039199</td>\n",
       "      <td>0.224227</td>\n",
       "      <td>0.296332</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0.224269</td>\n",
       "      <td>0.078864</td>\n",
       "      <td>0.089474</td>\n",
       "      <td>0.095354</td>\n",
       "      <td>0.163724</td>\n",
       "      <td>0.131795</td>\n",
       "      <td>0.211282</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.037040</td>\n",
       "      <td>0.183375</td>\n",
       "      <td>...</td>\n",
       "      <td>0.165008</td>\n",
       "      <td>0.066843</td>\n",
       "      <td>0.058766</td>\n",
       "      <td>0.068759</td>\n",
       "      <td>0.087159</td>\n",
       "      <td>0.129381</td>\n",
       "      <td>0.188662</td>\n",
       "      <td>0.121223</td>\n",
       "      <td>0.083910</td>\n",
       "      <td>0.273238</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0.026017</td>\n",
       "      <td>0.068940</td>\n",
       "      <td>0.078162</td>\n",
       "      <td>0.059498</td>\n",
       "      <td>0.038474</td>\n",
       "      <td>0.110944</td>\n",
       "      <td>0.107795</td>\n",
       "      <td>0.037040</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.155435</td>\n",
       "      <td>...</td>\n",
       "      <td>0.011708</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.101710</td>\n",
       "      <td>0.034568</td>\n",
       "      <td>0.045002</td>\n",
       "      <td>0.052699</td>\n",
       "      <td>0.107486</td>\n",
       "      <td>0.055766</td>\n",
       "      <td>0.070065</td>\n",
       "      <td>0.088281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0.286411</td>\n",
       "      <td>0.092399</td>\n",
       "      <td>0.037670</td>\n",
       "      <td>0.053879</td>\n",
       "      <td>0.153021</td>\n",
       "      <td>0.400758</td>\n",
       "      <td>0.328923</td>\n",
       "      <td>0.183375</td>\n",
       "      <td>0.155435</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.278558</td>\n",
       "      <td>0.049310</td>\n",
       "      <td>0.153506</td>\n",
       "      <td>0.065471</td>\n",
       "      <td>0.060088</td>\n",
       "      <td>0.033686</td>\n",
       "      <td>0.197107</td>\n",
       "      <td>0.085402</td>\n",
       "      <td>0.118945</td>\n",
       "      <td>0.162538</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 943 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "user_id       1         2         3         4         5         6         7    \\\n",
       "user_id                                                                         \n",
       "1        1.000000  0.118076  0.029097  0.011628  0.264677  0.312419  0.308729   \n",
       "2        0.118076  1.000000  0.099097  0.107680  0.034279  0.152789  0.086705   \n",
       "3        0.029097  0.099097  1.000000  0.252131  0.026893  0.062539  0.039767   \n",
       "4        0.011628  0.107680  0.252131  1.000000  0.000000  0.045543  0.078812   \n",
       "5        0.264677  0.034279  0.026893  0.000000  1.000000  0.202843  0.299619   \n",
       "6        0.312419  0.152789  0.062539  0.045543  0.202843  1.000000  0.375963   \n",
       "7        0.308729  0.086705  0.039767  0.078812  0.299619  0.375963  1.000000   \n",
       "8        0.224269  0.078864  0.089474  0.095354  0.163724  0.131795  0.211282   \n",
       "9        0.026017  0.068940  0.078162  0.059498  0.038474  0.110944  0.107795   \n",
       "10       0.286411  0.092399  0.037670  0.053879  0.153021  0.400758  0.328923   \n",
       "\n",
       "user_id       8         9         10     ...          934       935       936  \\\n",
       "user_id                                  ...                                    \n",
       "1        0.224269  0.026017  0.286411    ...     0.308475  0.055872  0.197862   \n",
       "2        0.078864  0.068940  0.092399    ...     0.086927  0.259636  0.289092   \n",
       "3        0.089474  0.078162  0.037670    ...     0.040918  0.019031  0.065417   \n",
       "4        0.095354  0.059498  0.053879    ...     0.024226  0.050703  0.056561   \n",
       "5        0.163724  0.038474  0.153021    ...     0.262547  0.048524  0.048312   \n",
       "6        0.131795  0.110944  0.400758    ...     0.287549  0.080312  0.162988   \n",
       "7        0.211282  0.107795  0.328923    ...     0.290002  0.074170  0.094619   \n",
       "8        1.000000  0.037040  0.183375    ...     0.165008  0.066843  0.058766   \n",
       "9        0.037040  1.000000  0.155435    ...     0.011708  0.000000  0.101710   \n",
       "10       0.183375  0.155435  1.000000    ...     0.278558  0.049310  0.153506   \n",
       "\n",
       "user_id       937       938       939       940       941       942       943  \n",
       "user_id                                                                        \n",
       "1        0.131367  0.152449  0.084456  0.293293  0.056765  0.103536  0.326491  \n",
       "2        0.318824  0.149105  0.186347  0.168034  0.106748  0.136796  0.080358  \n",
       "3        0.055373  0.086503  0.018418  0.096993  0.109631  0.092574  0.018987  \n",
       "4        0.107294  0.098892  0.000000  0.132900  0.142798  0.097066  0.015176  \n",
       "5        0.022202  0.091910  0.066000  0.156172  0.115842  0.124297  0.267574  \n",
       "6        0.182856  0.114262  0.092090  0.261859  0.097606  0.206104  0.187637  \n",
       "7        0.084235  0.115620  0.100625  0.233843  0.039199  0.224227  0.296332  \n",
       "8        0.068759  0.087159  0.129381  0.188662  0.121223  0.083910  0.273238  \n",
       "9        0.034568  0.045002  0.052699  0.107486  0.055766  0.070065  0.088281  \n",
       "10       0.065471  0.060088  0.033686  0.197107  0.085402  0.118945  0.162538  \n",
       "\n",
       "[10 rows x 943 columns]"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Convert into pandas dataframe \n",
    "cosine_sim = pd.DataFrame(cosine_sim, index=r_matrix.index, columns=r_matrix.index)\n",
    "\n",
    "cosine_sim.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#User Based Collaborative Filter using Weighted Mean Ratings\n",
    "def cf_user_wmean(user_id, movie_id):\n",
    "    \n",
    "    #Check if movie_id exists in r_matrix\n",
    "    if movie_id in r_matrix:\n",
    "        \n",
    "        #Get the similarity scores for the user in question with every other user\n",
    "        sim_scores = cosine_sim[user_id]\n",
    "        \n",
    "        #Get the user ratings for the movie in question\n",
    "        m_ratings = r_matrix[movie_id]\n",
    "        \n",
    "        #Extract the indices containing NaN in the m_ratings series\n",
    "        idx = m_ratings[m_ratings.isnull()].index\n",
    "        \n",
    "        #Drop the NaN values from the m_ratings Series\n",
    "        m_ratings = m_ratings.dropna()\n",
    "        \n",
    "        #Drop the corresponding cosine scores from the sim_scores series\n",
    "        sim_scores = sim_scores.drop(idx)\n",
    "        \n",
    "        #Compute the final weighted mean\n",
    "        wmean_rating = np.dot(sim_scores, m_ratings)/ sim_scores.sum()\n",
    "    \n",
    "    else:\n",
    "        #Default to a rating of 3.0 in the absence of any information\n",
    "        wmean_rating = 3.0\n",
    "    \n",
    "    return wmean_rating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0174483808407588"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score(cf_user_wmean)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Demographics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>movie_id</th>\n",
       "      <th>rating</th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>occupation</th>\n",
       "      <th>zip_code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>889</td>\n",
       "      <td>684</td>\n",
       "      <td>2</td>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>78704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>889</td>\n",
       "      <td>279</td>\n",
       "      <td>2</td>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>78704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>889</td>\n",
       "      <td>29</td>\n",
       "      <td>3</td>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>78704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>889</td>\n",
       "      <td>190</td>\n",
       "      <td>3</td>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>78704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>889</td>\n",
       "      <td>232</td>\n",
       "      <td>3</td>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>78704</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user_id  movie_id  rating  age sex  occupation zip_code\n",
       "0      889       684       2   24   M  technician    78704\n",
       "1      889       279       2   24   M  technician    78704\n",
       "2      889        29       3   24   M  technician    78704\n",
       "3      889       190       3   24   M  technician    78704\n",
       "4      889       232       3   24   M  technician    78704"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Merge the original users dataframe with the training set \n",
    "merged_df = pd.merge(X_train, users)\n",
    "\n",
    "merged_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sex\n",
       "F    3.827586\n",
       "M    3.918919\n",
       "Name: rating, dtype: float64"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Compute the mean rating of every movie by gender\n",
    "gender_mean = merged_df[['movie_id', 'sex', 'rating']].groupby(['movie_id', 'sex'])['rating'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Set the index of the users dataframe to the user_id\n",
    "users = users.set_index('user_id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Gender Based Collaborative Filter using Mean Ratings\n",
    "def cf_gender(user_id, movie_id):\n",
    "    \n",
    "    #Check if movie_id exists in r_matrix (or training set)\n",
    "    if movie_id in r_matrix:\n",
    "        #Identify the gender of the user\n",
    "        gender = users.loc[user_id]['sex']\n",
    "        \n",
    "        #Check if the gender has rated the movie\n",
    "        if gender in gender_mean[movie_id]:\n",
    "            \n",
    "            #Compute the mean rating given by that gender to the movie\n",
    "            gender_rating = gender_mean[movie_id][gender]\n",
    "        \n",
    "        else:\n",
    "            gender_rating = 3.0\n",
    "    \n",
    "    else:\n",
    "        #Default to a rating of 3.0 in the absence of any information\n",
    "        gender_rating = 3.0\n",
    "    \n",
    "    return gender_rating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.6/site-packages/pandas/core/indexes/multi.py:819: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
      "  return self._engine.get_value(s, k)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "1.0330308800874282"
      ]
     },
     "execution_count": 166,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score(cf_gender)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>occupation</th>\n",
       "      <th colspan=\"2\" halign=\"left\">administrator</th>\n",
       "      <th colspan=\"2\" halign=\"left\">artist</th>\n",
       "      <th>doctor</th>\n",
       "      <th colspan=\"2\" halign=\"left\">educator</th>\n",
       "      <th colspan=\"2\" halign=\"left\">engineer</th>\n",
       "      <th>entertainment</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"2\" halign=\"left\">salesman</th>\n",
       "      <th colspan=\"2\" halign=\"left\">scientist</th>\n",
       "      <th colspan=\"2\" halign=\"left\">student</th>\n",
       "      <th colspan=\"2\" halign=\"left\">technician</th>\n",
       "      <th colspan=\"2\" halign=\"left\">writer</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sex</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "      <th>M</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "      <th>F</th>\n",
       "      <th>...</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>movie_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.0</td>\n",
       "      <td>4.222222</td>\n",
       "      <td>4.25</td>\n",
       "      <td>3.500000</td>\n",
       "      <td>3.666667</td>\n",
       "      <td>3.50</td>\n",
       "      <td>3.923077</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.970588</td>\n",
       "      <td>5.0</td>\n",
       "      <td>...</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>3.5</td>\n",
       "      <td>3.888889</td>\n",
       "      <td>3.833333</td>\n",
       "      <td>3.709091</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.200000</td>\n",
       "      <td>4.166667</td>\n",
       "      <td>3.142857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.750000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.250000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.363636</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.333333</td>\n",
       "      <td>3.333333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.714286</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>2.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.5</td>\n",
       "      <td>2.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.00</td>\n",
       "      <td>2.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.625000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.217391</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.888889</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.666667</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.75</td>\n",
       "      <td>3.636364</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.555556</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.666667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.600000</td>\n",
       "      <td>3.285714</td>\n",
       "      <td>3.724138</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.200000</td>\n",
       "      <td>4.250000</td>\n",
       "      <td>3.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>4.0</td>\n",
       "      <td>2.333333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.00</td>\n",
       "      <td>1.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.666667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.500000</td>\n",
       "      <td>4.333333</td>\n",
       "      <td>3.272727</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.333333</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>2.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 41 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "occupation administrator           artist              doctor educator  \\\n",
       "sex                    F         M      F         M         M        F   \n",
       "movie_id                                                                 \n",
       "1                    4.0  4.222222   4.25  3.500000  3.666667     3.50   \n",
       "2                    3.0  3.750000    NaN       NaN       NaN      NaN   \n",
       "3                    3.5  2.500000    NaN       NaN       NaN     4.00   \n",
       "4                    3.0  3.888889    NaN  4.666667  3.000000     2.75   \n",
       "5                    4.0  2.333333    NaN       NaN       NaN     4.00   \n",
       "\n",
       "occupation           engineer           entertainment    ...    salesman  \\\n",
       "sex                M        F         M             F    ...           F   \n",
       "movie_id                                                 ...               \n",
       "1           3.923077      4.0  3.970588           5.0    ...         4.0   \n",
       "2           3.250000      NaN  3.363636           NaN    ...         NaN   \n",
       "3           2.500000      NaN  3.625000           NaN    ...         NaN   \n",
       "4           3.636364      NaN  3.555556           NaN    ...         4.0   \n",
       "5           1.500000      NaN  2.666667           NaN    ...         NaN   \n",
       "\n",
       "occupation           scientist             student           technician  \\\n",
       "sex                M         F         M         F         M          F   \n",
       "movie_id                                                                  \n",
       "1           4.000000       3.5  3.888889  3.833333  3.709091        4.0   \n",
       "2                NaN       NaN       NaN  2.333333  3.333333        NaN   \n",
       "3           1.000000       NaN       NaN  2.000000  3.217391        NaN   \n",
       "4           3.666667       NaN  3.600000  3.285714  3.724138        NaN   \n",
       "5                NaN       NaN  3.500000  4.333333  3.272727        NaN   \n",
       "\n",
       "occupation              writer            \n",
       "sex                M         F         M  \n",
       "movie_id                                  \n",
       "1           4.200000  4.166667  3.142857  \n",
       "2           2.714286  5.000000  2.666667  \n",
       "3           4.000000       NaN  1.000000  \n",
       "4           3.200000  4.250000  3.500000  \n",
       "5           3.333333  4.000000  2.666667  \n",
       "\n",
       "[5 rows x 41 columns]"
      ]
     },
     "execution_count": 174,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Compute the mean rating by gender and occupation\n",
    "gen_occ_mean = merged_df[['sex', 'rating', 'movie_id', 'occupation']].pivot_table(\n",
    "    values='rating', index='movie_id', columns=['occupation', 'sex'], aggfunc='mean')\n",
    "\n",
    "gen_occ_mean.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Gender and Occupation Based Collaborative Filter using Mean Ratings\n",
    "def cf_gen_occ(user_id, movie_id):\n",
    "    \n",
    "    #Check if movie_id exists in gen_occ_mean\n",
    "    if movie_id in gen_occ_mean.index:\n",
    "        \n",
    "        #Identify the user\n",
    "        user = users.loc[user_id]\n",
    "        \n",
    "        #Identify the gender and occupation\n",
    "        gender = user['sex']\n",
    "        occ = user['occupation']\n",
    "        \n",
    "        #Check if the occupation has rated the movie\n",
    "        if occ in gen_occ_mean.loc[movie_id]:\n",
    "            \n",
    "            #Check if the gender has rated the movie\n",
    "            if gender in gen_occ_mean.loc[movie_id][occ]:\n",
    "                \n",
    "                #Extract the required rating\n",
    "                rating = gen_occ_mean.loc[movie_id][occ][gender]\n",
    "                \n",
    "                #Default to 3.0 if the rating is null\n",
    "                if np.isnan(rating):\n",
    "                    rating = 3.0\n",
    "                \n",
    "                return rating\n",
    "            \n",
    "    #Return the default rating    \n",
    "    return 3.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 199,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.1391976012043645"
      ]
     },
     "execution_count": 199,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score(cf_gen_occ)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model Based Approaches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 231,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluating RMSE of algorithm KNNBasic.\n",
      "\n",
      "------------\n",
      "Fold 1\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "RMSE: 0.9776\n",
      "------------\n",
      "Fold 2\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "RMSE: 0.9789\n",
      "------------\n",
      "Fold 3\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "RMSE: 0.9695\n",
      "------------\n",
      "Fold 4\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "RMSE: 0.9810\n",
      "------------\n",
      "Fold 5\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "RMSE: 0.9849\n",
      "------------\n",
      "------------\n",
      "Mean RMSE: 0.9784\n",
      "------------\n",
      "------------\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "CaseInsensitiveDefaultDict(list,\n",
       "                           {'rmse': [0.97764007686097709,\n",
       "                             0.97889035204999741,\n",
       "                             0.9694859699934969,\n",
       "                             0.98099811511904433,\n",
       "                             0.98488926832497381]})"
      ]
     },
     "execution_count": 231,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Import the required classes and methods from the surprise library\n",
    "from surprise import Reader, Dataset, KNNBasic, evaluate\n",
    "\n",
    "#Define a Reader object\n",
    "#The Reader object helps in parsing the file or dataframe containing ratings\n",
    "reader = Reader()\n",
    "\n",
    "#Create the dataset to be used for building the filter\n",
    "data = Dataset.load_from_df(ratings, reader)\n",
    "\n",
    "#Define the algorithm object; in this case kNN\n",
    "knn = KNNBasic()\n",
    "\n",
    "#Evaluate the performance in terms of RMSE\n",
    "evaluate(knn, data, measures=['RMSE'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 232,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluating RMSE of algorithm SVD.\n",
      "\n",
      "------------\n",
      "Fold 1\n",
      "RMSE: 0.9371\n",
      "------------\n",
      "Fold 2\n",
      "RMSE: 0.9417\n",
      "------------\n",
      "Fold 3\n",
      "RMSE: 0.9289\n",
      "------------\n",
      "Fold 4\n",
      "RMSE: 0.9379\n",
      "------------\n",
      "Fold 5\n",
      "RMSE: 0.9379\n",
      "------------\n",
      "------------\n",
      "Mean RMSE: 0.9367\n",
      "------------\n",
      "------------\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "CaseInsensitiveDefaultDict(list,\n",
       "                           {'rmse': [0.93714337825960081,\n",
       "                             0.9417378198331483,\n",
       "                             0.92893737314257874,\n",
       "                             0.93793761103739881,\n",
       "                             0.93789928866069328]})"
      ]
     },
     "execution_count": 232,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Import SVD\n",
    "from surprise import SVD\n",
    "\n",
    "#Define the SVD algorithm object\n",
    "svd = SVD()\n",
    "\n",
    "#Evaluate the performance in terms of RMSE\n",
    "evaluate(svd, data, measures=['RMSE'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}