||
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'1.4.3'"
- ]
- },
- "execution_count": 1,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import pandas as pd\n",
- "pd.__version__"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>adult</th>\n",
- " <th>belongs_to_collection</th>\n",
- " <th>budget</th>\n",
- " <th>genres</th>\n",
- " <th>homepage</th>\n",
- " <th>id</th>\n",
- " <th>imdb_id</th>\n",
- " <th>original_language</th>\n",
- " <th>original_title</th>\n",
- " <th>overview</th>\n",
- " <th>...</th>\n",
- " <th>release_date</th>\n",
- " <th>revenue</th>\n",
- " <th>runtime</th>\n",
- " <th>spoken_languages</th>\n",
- " <th>status</th>\n",
- " <th>tagline</th>\n",
- " <th>title</th>\n",
- " <th>video</th>\n",
- " <th>vote_average</th>\n",
- " <th>vote_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>False</td>\n",
- " <td>{'id': 10194, 'name': 'Toy Story Collection', ...</td>\n",
- " <td>30000000</td>\n",
- " <td>[{'id': 16, 'name': 'Animation'}, {'id': 35, '...</td>\n",
- " <td>http://toystory.disney.com/toy-story</td>\n",
- " <td>862</td>\n",
- " <td>tt0114709</td>\n",
- " <td>en</td>\n",
- " <td>Toy Story</td>\n",
- " <td>Led by Woody, Andy's toys live happily in his ...</td>\n",
- " <td>...</td>\n",
- " <td>1995-10-30</td>\n",
- " <td>373554033.0</td>\n",
- " <td>81.0</td>\n",
- " <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
- " <td>Released</td>\n",
- " <td>NaN</td>\n",
- " <td>Toy Story</td>\n",
- " <td>False</td>\n",
- " <td>7.7</td>\n",
- " <td>5415.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>False</td>\n",
- " <td>NaN</td>\n",
- " <td>65000000</td>\n",
- " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
- " <td>NaN</td>\n",
- " <td>8844</td>\n",
- " <td>tt0113497</td>\n",
- " <td>en</td>\n",
- " <td>Jumanji</td>\n",
- " <td>When siblings Judy and Peter discover an encha...</td>\n",
- " <td>...</td>\n",
- " <td>1995-12-15</td>\n",
- " <td>262797249.0</td>\n",
- " <td>104.0</td>\n",
- " <td>[{'iso_639_1': 'en', 'name': 'English'}, {'iso...</td>\n",
- " <td>Released</td>\n",
- " <td>Roll the dice and unleash the excitement!</td>\n",
- " <td>Jumanji</td>\n",
- " <td>False</td>\n",
- " <td>6.9</td>\n",
- " <td>2413.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>False</td>\n",
- " <td>{'id': 119050, 'name': 'Grumpy Old Men Collect...</td>\n",
- " <td>0</td>\n",
- " <td>[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...</td>\n",
- " <td>NaN</td>\n",
- " <td>15602</td>\n",
- " <td>tt0113228</td>\n",
- " <td>en</td>\n",
- " <td>Grumpier Old Men</td>\n",
- " <td>A family wedding reignites the ancient feud be...</td>\n",
- " <td>...</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>0.0</td>\n",
- " <td>101.0</td>\n",
- " <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
- " <td>Released</td>\n",
- " <td>Still Yelling. Still Fighting. Still Ready for...</td>\n",
- " <td>Grumpier Old Men</td>\n",
- " <td>False</td>\n",
- " <td>6.5</td>\n",
- " <td>92.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>False</td>\n",
- " <td>NaN</td>\n",
- " <td>16000000</td>\n",
- " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...</td>\n",
- " <td>NaN</td>\n",
- " <td>31357</td>\n",
- " <td>tt0114885</td>\n",
- " <td>en</td>\n",
- " <td>Waiting to Exhale</td>\n",
- " <td>Cheated on, mistreated and stepped on, the wom...</td>\n",
- " <td>...</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>81452156.0</td>\n",
- " <td>127.0</td>\n",
- " <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
- " <td>Released</td>\n",
- " <td>Friends are the people who let you be yourself...</td>\n",
- " <td>Waiting to Exhale</td>\n",
- " <td>False</td>\n",
- " <td>6.1</td>\n",
- " <td>34.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>False</td>\n",
- " <td>{'id': 96871, 'name': 'Father of the Bride Col...</td>\n",
- " <td>0</td>\n",
- " <td>[{'id': 35, 'name': 'Comedy'}]</td>\n",
- " <td>NaN</td>\n",
- " <td>11862</td>\n",
- " <td>tt0113041</td>\n",
- " <td>en</td>\n",
- " <td>Father of the Bride Part II</td>\n",
- " <td>Just when George Banks has recovered from his ...</td>\n",
- " <td>...</td>\n",
- " <td>1995-02-10</td>\n",
- " <td>76578911.0</td>\n",
- " <td>106.0</td>\n",
- " <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
- " <td>Released</td>\n",
- " <td>Just When His World Is Back To Normal... He's ...</td>\n",
- " <td>Father of the Bride Part II</td>\n",
- " <td>False</td>\n",
- " <td>5.7</td>\n",
- " <td>173.0</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>5 rows × 24 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " adult belongs_to_collection budget \\\n",
- "0 False {'id': 10194, 'name': 'Toy Story Collection', ... 30000000 \n",
- "1 False NaN 65000000 \n",
- "2 False {'id': 119050, 'name': 'Grumpy Old Men Collect... 0 \n",
- "3 False NaN 16000000 \n",
- "4 False {'id': 96871, 'name': 'Father of the Bride Col... 0 \n",
- "\n",
- " genres \\\n",
- "0 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n",
- "1 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n",
- "2 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... \n",
- "3 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n",
- "4 [{'id': 35, 'name': 'Comedy'}] \n",
- "\n",
- " homepage id imdb_id original_language \\\n",
- "0 http://toystory.disney.com/toy-story 862 tt0114709 en \n",
- "1 NaN 8844 tt0113497 en \n",
- "2 NaN 15602 tt0113228 en \n",
- "3 NaN 31357 tt0114885 en \n",
- "4 NaN 11862 tt0113041 en \n",
- "\n",
- " original_title \\\n",
- "0 Toy Story \n",
- "1 Jumanji \n",
- "2 Grumpier Old Men \n",
- "3 Waiting to Exhale \n",
- "4 Father of the Bride Part II \n",
- "\n",
- " overview ... release_date \\\n",
- "0 Led by Woody, Andy's toys live happily in his ... ... 1995-10-30 \n",
- "1 When siblings Judy and Peter discover an encha... ... 1995-12-15 \n",
- "2 A family wedding reignites the ancient feud be... ... 1995-12-22 \n",
- "3 Cheated on, mistreated and stepped on, the wom... ... 1995-12-22 \n",
- "4 Just when George Banks has recovered from his ... ... 1995-02-10 \n",
- "\n",
- " revenue runtime spoken_languages \\\n",
- "0 373554033.0 81.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
- "1 262797249.0 104.0 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... \n",
- "2 0.0 101.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
- "3 81452156.0 127.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
- "4 76578911.0 106.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
- "\n",
- " status tagline \\\n",
- "0 Released NaN \n",
- "1 Released Roll the dice and unleash the excitement! \n",
- "2 Released Still Yelling. Still Fighting. Still Ready for... \n",
- "3 Released Friends are the people who let you be yourself... \n",
- "4 Released Just When His World Is Back To Normal... He's ... \n",
- "\n",
- " title video vote_average vote_count \n",
- "0 Toy Story False 7.7 5415.0 \n",
- "1 Jumanji False 6.9 2413.0 \n",
- "2 Grumpier Old Men False 6.5 92.0 \n",
- "3 Waiting to Exhale False 6.1 34.0 \n",
- "4 Father of the Bride Part II False 5.7 173.0 \n",
- "\n",
- "[5 rows x 24 columns]"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Read the CSV File into df\n",
- "df = pd.read_csv('../data/movies_metadata.csv', low_memory=False)\n",
- "\n",
- "#We will find out what the following code does a little later!\n",
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "pandas.core.frame.DataFrame"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Output the type of df\n",
- "type(df)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(45466, 24)"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Output the shape of df\n",
- "df.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',\n",
- " 'imdb_id', 'original_language', 'original_title', 'overview',\n",
- " 'popularity', 'poster_path', 'production_companies',\n",
- " 'production_countries', 'release_date', 'revenue', 'runtime',\n",
- " 'spoken_languages', 'status', 'tagline', 'title', 'video',\n",
- " 'vote_average', 'vote_count'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Output the columns of df\n",
- "df.columns"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "adult False\n",
- "belongs_to_collection NaN\n",
- "budget 65000000\n",
- "genres [{'id': 12, 'name': 'Adventure'}, {'id': 14, '...\n",
- "homepage NaN\n",
- "id 8844\n",
- "imdb_id tt0113497\n",
- "original_language en\n",
- "original_title Jumanji\n",
- "overview When siblings Judy and Peter discover an encha...\n",
- "popularity 17.015539\n",
- "poster_path /vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg\n",
- "production_companies [{'name': 'TriStar Pictures', 'id': 559}, {'na...\n",
- "production_countries [{'iso_3166_1': 'US', 'name': 'United States o...\n",
- "release_date 1995-12-15\n",
- "revenue 262797249.0\n",
- "runtime 104.0\n",
- "spoken_languages [{'iso_639_1': 'en', 'name': 'English'}, {'iso...\n",
- "status Released\n",
- "tagline Roll the dice and unleash the excitement!\n",
- "title Jumanji\n",
- "video False\n",
- "vote_average 6.9\n",
- "vote_count 2413.0\n",
- "Name: 1, dtype: object"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Select the second movie in df\n",
- "second = df.iloc[1]\n",
- "second"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "adult False\n",
- "belongs_to_collection NaN\n",
- "budget 65000000\n",
- "genres [{'id': 12, 'name': 'Adventure'}, {'id': 14, '...\n",
- "homepage NaN\n",
- "id 8844\n",
- "imdb_id tt0113497\n",
- "original_language en\n",
- "original_title Jumanji\n",
- "overview When siblings Judy and Peter discover an encha...\n",
- "popularity 17.015539\n",
- "poster_path /vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg\n",
- "production_companies [{'name': 'TriStar Pictures', 'id': 559}, {'na...\n",
- "production_countries [{'iso_3166_1': 'US', 'name': 'United States o...\n",
- "release_date 1995-12-15\n",
- "revenue 262797249.0\n",
- "runtime 104.0\n",
- "spoken_languages [{'iso_639_1': 'en', 'name': 'English'}, {'iso...\n",
- "status Released\n",
- "tagline Roll the dice and unleash the excitement!\n",
- "video False\n",
- "vote_average 6.9\n",
- "vote_count 2413.0\n",
- "Name: Jumanji, dtype: object"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Change the index to the title\n",
- "df = df.set_index('title')\n",
- "\n",
- "#Access the movie with title 'Jumanji'\n",
- "jum = df.loc['Jumanji']\n",
- "jum"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "df = df.reset_index()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>title</th>\n",
- " <th>release_date</th>\n",
- " <th>budget</th>\n",
- " <th>revenue</th>\n",
- " <th>runtime</th>\n",
- " <th>genres</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>Toy Story</td>\n",
- " <td>1995-10-30</td>\n",
- " <td>30000000</td>\n",
- " <td>373554033.0</td>\n",
- " <td>81.0</td>\n",
- " <td>[{'id': 16, 'name': 'Animation'}, {'id': 35, '...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>Jumanji</td>\n",
- " <td>1995-12-15</td>\n",
- " <td>65000000</td>\n",
- " <td>262797249.0</td>\n",
- " <td>104.0</td>\n",
- " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>Grumpier Old Men</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>0</td>\n",
- " <td>0.0</td>\n",
- " <td>101.0</td>\n",
- " <td>[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>Waiting to Exhale</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>16000000</td>\n",
- " <td>81452156.0</td>\n",
- " <td>127.0</td>\n",
- " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>Father of the Bride Part II</td>\n",
- " <td>1995-02-10</td>\n",
- " <td>0</td>\n",
- " <td>76578911.0</td>\n",
- " <td>106.0</td>\n",
- " <td>[{'id': 35, 'name': 'Comedy'}]</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " title release_date budget revenue runtime \\\n",
- "0 Toy Story 1995-10-30 30000000 373554033.0 81.0 \n",
- "1 Jumanji 1995-12-15 65000000 262797249.0 104.0 \n",
- "2 Grumpier Old Men 1995-12-22 0 0.0 101.0 \n",
- "3 Waiting to Exhale 1995-12-22 16000000 81452156.0 127.0 \n",
- "4 Father of the Bride Part II 1995-02-10 0 76578911.0 106.0 \n",
- "\n",
- " genres \n",
- "0 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n",
- "1 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n",
- "2 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... \n",
- "3 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n",
- "4 [{'id': 35, 'name': 'Comedy'}] "
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Create a smaller dataframe with a subset of all features\n",
- "small_df = df[['title', 'release_date', 'budget', 'revenue', 'runtime', 'genres']]\n",
- "\n",
- "#Output only the first 5 rows of small_df\n",
- "small_df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>title</th>\n",
- " <th>release_date</th>\n",
- " <th>budget</th>\n",
- " <th>revenue</th>\n",
- " <th>runtime</th>\n",
- " <th>genres</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>Toy Story</td>\n",
- " <td>1995-10-30</td>\n",
- " <td>30000000</td>\n",
- " <td>373554033.0</td>\n",
- " <td>81.0</td>\n",
- " <td>[{'id': 16, 'name': 'Animation'}, {'id': 35, '...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>Jumanji</td>\n",
- " <td>1995-12-15</td>\n",
- " <td>65000000</td>\n",
- " <td>262797249.0</td>\n",
- " <td>104.0</td>\n",
- " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>Grumpier Old Men</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>0</td>\n",
- " <td>0.0</td>\n",
- " <td>101.0</td>\n",
- " <td>[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>Waiting to Exhale</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>16000000</td>\n",
- " <td>81452156.0</td>\n",
- " <td>127.0</td>\n",
- " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>Father of the Bride Part II</td>\n",
- " <td>1995-02-10</td>\n",
- " <td>0</td>\n",
- " <td>76578911.0</td>\n",
- " <td>106.0</td>\n",
- " <td>[{'id': 35, 'name': 'Comedy'}]</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5</th>\n",
- " <td>Heat</td>\n",
- " <td>1995-12-15</td>\n",
- " <td>60000000</td>\n",
- " <td>187436818.0</td>\n",
- " <td>170.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6</th>\n",
- " <td>Sabrina</td>\n",
- " <td>1995-12-15</td>\n",
- " <td>58000000</td>\n",
- " <td>0.0</td>\n",
- " <td>127.0</td>\n",
- " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7</th>\n",
- " <td>Tom and Huck</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>0</td>\n",
- " <td>0.0</td>\n",
- " <td>97.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8</th>\n",
- " <td>Sudden Death</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>35000000</td>\n",
- " <td>64350171.0</td>\n",
- " <td>106.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9</th>\n",
- " <td>GoldenEye</td>\n",
- " <td>1995-11-16</td>\n",
- " <td>58000000</td>\n",
- " <td>352194034.0</td>\n",
- " <td>130.0</td>\n",
- " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10</th>\n",
- " <td>The American President</td>\n",
- " <td>1995-11-17</td>\n",
- " <td>62000000</td>\n",
- " <td>107879496.0</td>\n",
- " <td>106.0</td>\n",
- " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>11</th>\n",
- " <td>Dracula: Dead and Loving It</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>0</td>\n",
- " <td>0.0</td>\n",
- " <td>88.0</td>\n",
- " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 27, 'nam...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>12</th>\n",
- " <td>Balto</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>0</td>\n",
- " <td>11348324.0</td>\n",
- " <td>78.0</td>\n",
- " <td>[{'id': 10751, 'name': 'Family'}, {'id': 16, '...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>13</th>\n",
- " <td>Nixon</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>44000000</td>\n",
- " <td>13681765.0</td>\n",
- " <td>192.0</td>\n",
- " <td>[{'id': 36, 'name': 'History'}, {'id': 18, 'na...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>14</th>\n",
- " <td>Cutthroat Island</td>\n",
- " <td>1995-12-22</td>\n",
- " <td>98000000</td>\n",
- " <td>10017322.0</td>\n",
- " <td>119.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " title release_date budget revenue runtime \\\n",
- "0 Toy Story 1995-10-30 30000000 373554033.0 81.0 \n",
- "1 Jumanji 1995-12-15 65000000 262797249.0 104.0 \n",
- "2 Grumpier Old Men 1995-12-22 0 0.0 101.0 \n",
- "3 Waiting to Exhale 1995-12-22 16000000 81452156.0 127.0 \n",
- "4 Father of the Bride Part II 1995-02-10 0 76578911.0 106.0 \n",
- "5 Heat 1995-12-15 60000000 187436818.0 170.0 \n",
- "6 Sabrina 1995-12-15 58000000 0.0 127.0 \n",
- "7 Tom and Huck 1995-12-22 0 0.0 97.0 \n",
- "8 Sudden Death 1995-12-22 35000000 64350171.0 106.0 \n",
- "9 GoldenEye 1995-11-16 58000000 352194034.0 130.0 \n",
- "10 The American President 1995-11-17 62000000 107879496.0 106.0 \n",
- "11 Dracula: Dead and Loving It 1995-12-22 0 0.0 88.0 \n",
- "12 Balto 1995-12-22 0 11348324.0 78.0 \n",
- "13 Nixon 1995-12-22 44000000 13681765.0 192.0 \n",
- "14 Cutthroat Island 1995-12-22 98000000 10017322.0 119.0 \n",
- "\n",
- " genres \n",
- "0 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n",
- "1 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n",
- "2 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... \n",
- "3 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n",
- "4 [{'id': 35, 'name': 'Comedy'}] \n",
- "5 [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam... \n",
- "6 [{'id': 35, 'name': 'Comedy'}, {'id': 10749, '... \n",
- "7 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n",
- "8 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n",
- "9 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... \n",
- "10 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n",
- "11 [{'id': 35, 'name': 'Comedy'}, {'id': 27, 'nam... \n",
- "12 [{'id': 10751, 'name': 'Family'}, {'id': 16, '... \n",
- "13 [{'id': 36, 'name': 'History'}, {'id': 18, 'na... \n",
- "14 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... "
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Display the first 15 rows\n",
- "small_df.head(15)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<class 'pandas.core.frame.DataFrame'>\n",
- "RangeIndex: 45466 entries, 0 to 45465\n",
- "Data columns (total 6 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 title 45460 non-null object \n",
- " 1 release_date 45379 non-null object \n",
- " 2 budget 45466 non-null object \n",
- " 3 revenue 45460 non-null float64\n",
- " 4 runtime 45203 non-null float64\n",
- " 5 genres 45466 non-null object \n",
- "dtypes: float64(2), object(4)\n",
- "memory usage: 2.1+ MB\n"
- ]
- }
- ],
- "source": [
- "#Get information of the data types of each feature\n",
- "small_df.info()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "ename": "ValueError",
- "evalue": "could not convert string to float: '/ff9qCepilowshEtG2GYWwzt2bs4.jpg'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "Input \u001b[0;32mIn [13]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbudget\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfloat\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/generic.py:5912\u001b[0m, in \u001b[0;36mNDFrame.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 5905\u001b[0m results \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 5906\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miloc[:, i]\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m 5907\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns))\n\u001b[1;32m 5908\u001b[0m ]\n\u001b[1;32m 5910\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 5911\u001b[0m \u001b[38;5;66;03m# else, only a single dtype is given\u001b[39;00m\n\u001b[0;32m-> 5912\u001b[0m new_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5913\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_constructor(new_data)\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mastype\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5915\u001b[0m \u001b[38;5;66;03m# GH 33113: handle empty frame or series\u001b[39;00m\n",
- "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/internals/managers.py:419\u001b[0m, in \u001b[0;36mBaseBlockManager.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mastype\u001b[39m(\u001b[38;5;28mself\u001b[39m: T, dtype, copy: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, errors: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[0;32m--> 419\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mastype\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/internals/managers.py:304\u001b[0m, in \u001b[0;36mBaseBlockManager.apply\u001b[0;34m(self, f, align_keys, ignore_failures, **kwargs)\u001b[0m\n\u001b[1;32m 302\u001b[0m applied \u001b[38;5;241m=\u001b[39m b\u001b[38;5;241m.\u001b[39mapply(f, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 304\u001b[0m applied \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mNotImplementedError\u001b[39;00m):\n\u001b[1;32m 306\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m ignore_failures:\n",
- "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/internals/blocks.py:580\u001b[0m, in \u001b[0;36mBlock.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 563\u001b[0m \u001b[38;5;124;03mCoerce to the new dtype.\u001b[39;00m\n\u001b[1;32m 564\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 576\u001b[0m \u001b[38;5;124;03mBlock\u001b[39;00m\n\u001b[1;32m 577\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 578\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m--> 580\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array_safe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 582\u001b[0m new_values \u001b[38;5;241m=\u001b[39m maybe_coerce_values(new_values)\n\u001b[1;32m 583\u001b[0m newb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmake_block(new_values)\n",
- "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1292\u001b[0m, in \u001b[0;36mastype_array_safe\u001b[0;34m(values, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 1289\u001b[0m dtype \u001b[38;5;241m=\u001b[39m dtype\u001b[38;5;241m.\u001b[39mnumpy_dtype\n\u001b[1;32m 1291\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1292\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m# e.g. astype_nansafe can fail on object-dtype of strings\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# trying to convert to float\u001b[39;00m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
- "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1237\u001b[0m, in \u001b[0;36mastype_array\u001b[0;34m(values, dtype, copy)\u001b[0m\n\u001b[1;32m 1234\u001b[0m values \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1237\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_nansafe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;66;03m# in pandas we don't store numpy str dtypes, so convert to object\u001b[39;00m\n\u001b[1;32m 1240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(dtype, np\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(values\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtype, \u001b[38;5;28mstr\u001b[39m):\n",
- "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1181\u001b[0m, in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 1177\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 1179\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copy \u001b[38;5;129;01mor\u001b[39;00m is_object_dtype(arr\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mor\u001b[39;00m is_object_dtype(dtype):\n\u001b[1;32m 1180\u001b[0m \u001b[38;5;66;03m# Explicit copy, or required since NumPy can't view from / to object.\u001b[39;00m\n\u001b[0;32m-> 1181\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43marr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 1183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n",
- "\u001b[0;31mValueError\u001b[0m: could not convert string to float: '/ff9qCepilowshEtG2GYWwzt2bs4.jpg'"
- ]
- }
- ],
- "source": [
- "df['budget'].astype('float')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<class 'pandas.core.frame.DataFrame'>\n",
- "RangeIndex: 45466 entries, 0 to 45465\n",
- "Data columns (total 6 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 title 45460 non-null object \n",
- " 1 release_date 45379 non-null object \n",
- " 2 budget 45463 non-null float64\n",
- " 3 revenue 45460 non-null float64\n",
- " 4 runtime 45203 non-null float64\n",
- " 5 genres 45466 non-null object \n",
- "dtypes: float64(3), object(3)\n",
- "memory usage: 2.1+ MB\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/1765380320.py:13: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " small_df['budget'] = small_df['budget'].apply(to_float)\n",
- "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/1765380320.py:16: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " small_df['budget'] = small_df['budget'].astype('float')\n"
- ]
- }
- ],
- "source": [
- "#Import the numpy library \n",
- "import numpy as np\n",
- "\n",
- "#Function to convert to float manually\n",
- "def to_float(x):\n",
- " try:\n",
- " x = float(x)\n",
- " except: \n",
- " x = np.nan\n",
- " return x\n",
- "\n",
- "#Apply the to_float function to all values in the budget column\n",
- "small_df['budget'] = small_df['budget'].apply(to_float)\n",
- "\n",
- "#Try converting to float using pandas astype\n",
- "small_df['budget'] = small_df['budget'].astype('float')\n",
- "\n",
- "#Get the data types for all features\n",
- "small_df.info()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/2397457688.py:2: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " small_df['release_date'] = pd.to_datetime(small_df['release_date'], errors='coerce')\n",
- "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/2397457688.py:5: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " small_df['year'] = small_df['release_date'].apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)\n"
- ]
- }
- ],
- "source": [
- "#Convert release_date into pandas datetime format\n",
- "small_df['release_date'] = pd.to_datetime(small_df['release_date'], errors='coerce')\n",
- "\n",
- "#Extract year from the datetime\n",
- "small_df['year'] = small_df['release_date'].apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>title</th>\n",
- " <th>release_date</th>\n",
- " <th>budget</th>\n",
- " <th>revenue</th>\n",
- " <th>runtime</th>\n",
- " <th>genres</th>\n",
- " <th>year</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>34940</th>\n",
- " <td>Passage of Venus</td>\n",
- " <td>1874-12-09</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.0</td>\n",
- " <td>[{'id': 99, 'name': 'Documentary'}]</td>\n",
- " <td>1874</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>34937</th>\n",
- " <td>Sallie Gardner at a Gallop</td>\n",
- " <td>1878-06-14</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.0</td>\n",
- " <td>[{'id': 99, 'name': 'Documentary'}]</td>\n",
- " <td>1878</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>41602</th>\n",
- " <td>Buffalo Running</td>\n",
- " <td>1883-11-19</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.0</td>\n",
- " <td>[{'id': 99, 'name': 'Documentary'}]</td>\n",
- " <td>1883</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>34933</th>\n",
- " <td>Man Walking Around a Corner</td>\n",
- " <td>1887-08-18</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.0</td>\n",
- " <td>[{'id': 99, 'name': 'Documentary'}]</td>\n",
- " <td>1887</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>34938</th>\n",
- " <td>Traffic Crossing Leeds Bridge</td>\n",
- " <td>1888-10-15</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.0</td>\n",
- " <td>[{'id': 99, 'name': 'Documentary'}]</td>\n",
- " <td>1888</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " title release_date budget revenue runtime \\\n",
- "34940 Passage of Venus 1874-12-09 0.0 0.0 1.0 \n",
- "34937 Sallie Gardner at a Gallop 1878-06-14 0.0 0.0 1.0 \n",
- "41602 Buffalo Running 1883-11-19 0.0 0.0 1.0 \n",
- "34933 Man Walking Around a Corner 1887-08-18 0.0 0.0 1.0 \n",
- "34938 Traffic Crossing Leeds Bridge 1888-10-15 0.0 0.0 1.0 \n",
- "\n",
- " genres year \n",
- "34940 [{'id': 99, 'name': 'Documentary'}] 1874 \n",
- "34937 [{'id': 99, 'name': 'Documentary'}] 1878 \n",
- "41602 [{'id': 99, 'name': 'Documentary'}] 1883 \n",
- "34933 [{'id': 99, 'name': 'Documentary'}] 1887 \n",
- "34938 [{'id': 99, 'name': 'Documentary'}] 1888 "
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Sort DataFrame based on release year\n",
- "small_df = small_df.sort_values('year')\n",
- "\n",
- "small_df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>title</th>\n",
- " <th>release_date</th>\n",
- " <th>budget</th>\n",
- " <th>revenue</th>\n",
- " <th>runtime</th>\n",
- " <th>genres</th>\n",
- " <th>year</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>14551</th>\n",
- " <td>Avatar</td>\n",
- " <td>2009-12-10</td>\n",
- " <td>237000000.0</td>\n",
- " <td>2.787965e+09</td>\n",
- " <td>162.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " <td>2009</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>26555</th>\n",
- " <td>Star Wars: The Force Awakens</td>\n",
- " <td>2015-12-15</td>\n",
- " <td>245000000.0</td>\n",
- " <td>2.068224e+09</td>\n",
- " <td>136.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " <td>2015</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1639</th>\n",
- " <td>Titanic</td>\n",
- " <td>1997-11-18</td>\n",
- " <td>200000000.0</td>\n",
- " <td>1.845034e+09</td>\n",
- " <td>194.0</td>\n",
- " <td>[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...</td>\n",
- " <td>1997</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>17818</th>\n",
- " <td>The Avengers</td>\n",
- " <td>2012-04-25</td>\n",
- " <td>220000000.0</td>\n",
- " <td>1.519558e+09</td>\n",
- " <td>143.0</td>\n",
- " <td>[{'id': 878, 'name': 'Science Fiction'}, {'id'...</td>\n",
- " <td>2012</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>25084</th>\n",
- " <td>Jurassic World</td>\n",
- " <td>2015-06-09</td>\n",
- " <td>150000000.0</td>\n",
- " <td>1.513529e+09</td>\n",
- " <td>124.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " <td>2015</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " title release_date budget revenue \\\n",
- "14551 Avatar 2009-12-10 237000000.0 2.787965e+09 \n",
- "26555 Star Wars: The Force Awakens 2015-12-15 245000000.0 2.068224e+09 \n",
- "1639 Titanic 1997-11-18 200000000.0 1.845034e+09 \n",
- "17818 The Avengers 2012-04-25 220000000.0 1.519558e+09 \n",
- "25084 Jurassic World 2015-06-09 150000000.0 1.513529e+09 \n",
- "\n",
- " runtime genres year \n",
- "14551 162.0 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2009 \n",
- "26555 136.0 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n",
- "1639 194.0 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... 1997 \n",
- "17818 143.0 [{'id': 878, 'name': 'Science Fiction'}, {'id'... 2012 \n",
- "25084 124.0 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 "
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Sort Movies based on revenue (in descending order)\n",
- "small_df = small_df.sort_values('revenue', ascending=False)\n",
- "\n",
- "small_df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>title</th>\n",
- " <th>release_date</th>\n",
- " <th>budget</th>\n",
- " <th>revenue</th>\n",
- " <th>runtime</th>\n",
- " <th>genres</th>\n",
- " <th>year</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>14551</th>\n",
- " <td>Avatar</td>\n",
- " <td>2009-12-10</td>\n",
- " <td>237000000.0</td>\n",
- " <td>2.787965e+09</td>\n",
- " <td>162.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " <td>2009</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>26555</th>\n",
- " <td>Star Wars: The Force Awakens</td>\n",
- " <td>2015-12-15</td>\n",
- " <td>245000000.0</td>\n",
- " <td>2.068224e+09</td>\n",
- " <td>136.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " <td>2015</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1639</th>\n",
- " <td>Titanic</td>\n",
- " <td>1997-11-18</td>\n",
- " <td>200000000.0</td>\n",
- " <td>1.845034e+09</td>\n",
- " <td>194.0</td>\n",
- " <td>[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...</td>\n",
- " <td>1997</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>17818</th>\n",
- " <td>The Avengers</td>\n",
- " <td>2012-04-25</td>\n",
- " <td>220000000.0</td>\n",
- " <td>1.519558e+09</td>\n",
- " <td>143.0</td>\n",
- " <td>[{'id': 878, 'name': 'Science Fiction'}, {'id'...</td>\n",
- " <td>2012</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>25084</th>\n",
- " <td>Jurassic World</td>\n",
- " <td>2015-06-09</td>\n",
- " <td>150000000.0</td>\n",
- " <td>1.513529e+09</td>\n",
- " <td>124.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " <td>2015</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>28830</th>\n",
- " <td>Furious 7</td>\n",
- " <td>2015-04-01</td>\n",
- " <td>190000000.0</td>\n",
- " <td>1.506249e+09</td>\n",
- " <td>137.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}]</td>\n",
- " <td>2015</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>26558</th>\n",
- " <td>Avengers: Age of Ultron</td>\n",
- " <td>2015-04-22</td>\n",
- " <td>280000000.0</td>\n",
- " <td>1.405404e+09</td>\n",
- " <td>141.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " <td>2015</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>17437</th>\n",
- " <td>Harry Potter and the Deathly Hallows: Part 2</td>\n",
- " <td>2011-07-07</td>\n",
- " <td>125000000.0</td>\n",
- " <td>1.342000e+09</td>\n",
- " <td>130.0</td>\n",
- " <td>[{'id': 10751, 'name': 'Family'}, {'id': 14, '...</td>\n",
- " <td>2011</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>22110</th>\n",
- " <td>Frozen</td>\n",
- " <td>2013-11-27</td>\n",
- " <td>150000000.0</td>\n",
- " <td>1.274219e+09</td>\n",
- " <td>102.0</td>\n",
- " <td>[{'id': 16, 'name': 'Animation'}, {'id': 12, '...</td>\n",
- " <td>2013</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>42222</th>\n",
- " <td>Beauty and the Beast</td>\n",
- " <td>2017-03-16</td>\n",
- " <td>160000000.0</td>\n",
- " <td>1.262886e+09</td>\n",
- " <td>129.0</td>\n",
- " <td>[{'id': 10751, 'name': 'Family'}, {'id': 14, '...</td>\n",
- " <td>2017</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>43255</th>\n",
- " <td>The Fate of the Furious</td>\n",
- " <td>2017-04-12</td>\n",
- " <td>250000000.0</td>\n",
- " <td>1.238765e+09</td>\n",
- " <td>136.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...</td>\n",
- " <td>2017</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>20830</th>\n",
- " <td>Iron Man 3</td>\n",
- " <td>2013-04-18</td>\n",
- " <td>200000000.0</td>\n",
- " <td>1.215440e+09</td>\n",
- " <td>130.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " <td>2013</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>30700</th>\n",
- " <td>Minions</td>\n",
- " <td>2015-06-17</td>\n",
- " <td>74000000.0</td>\n",
- " <td>1.156731e+09</td>\n",
- " <td>91.0</td>\n",
- " <td>[{'id': 10751, 'name': 'Family'}, {'id': 16, '...</td>\n",
- " <td>2015</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>26567</th>\n",
- " <td>Captain America: Civil War</td>\n",
- " <td>2016-04-27</td>\n",
- " <td>250000000.0</td>\n",
- " <td>1.153304e+09</td>\n",
- " <td>147.0</td>\n",
- " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...</td>\n",
- " <td>2016</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>17293</th>\n",
- " <td>Transformers: Dark of the Moon</td>\n",
- " <td>2011-06-28</td>\n",
- " <td>195000000.0</td>\n",
- " <td>1.123747e+09</td>\n",
- " <td>154.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 878, 'na...</td>\n",
- " <td>2011</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7000</th>\n",
- " <td>The Lord of the Rings: The Return of the King</td>\n",
- " <td>2003-12-01</td>\n",
- " <td>94000000.0</td>\n",
- " <td>1.118889e+09</td>\n",
- " <td>201.0</td>\n",
- " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
- " <td>2003</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>19261</th>\n",
- " <td>Skyfall</td>\n",
- " <td>2012-10-25</td>\n",
- " <td>200000000.0</td>\n",
- " <td>1.108561e+09</td>\n",
- " <td>143.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " <td>2012</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>23617</th>\n",
- " <td>Transformers: Age of Extinction</td>\n",
- " <td>2014-06-25</td>\n",
- " <td>210000000.0</td>\n",
- " <td>1.091405e+09</td>\n",
- " <td>165.0</td>\n",
- " <td>[{'id': 878, 'name': 'Science Fiction'}, {'id'...</td>\n",
- " <td>2014</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>18252</th>\n",
- " <td>The Dark Knight Rises</td>\n",
- " <td>2012-07-16</td>\n",
- " <td>250000000.0</td>\n",
- " <td>1.084939e+09</td>\n",
- " <td>165.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...</td>\n",
- " <td>2012</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>15348</th>\n",
- " <td>Toy Story 3</td>\n",
- " <td>2010-06-16</td>\n",
- " <td>200000000.0</td>\n",
- " <td>1.066970e+09</td>\n",
- " <td>103.0</td>\n",
- " <td>[{'id': 16, 'name': 'Animation'}, {'id': 10751...</td>\n",
- " <td>2010</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>11008</th>\n",
- " <td>Pirates of the Caribbean: Dead Man's Chest</td>\n",
- " <td>2006-06-20</td>\n",
- " <td>200000000.0</td>\n",
- " <td>1.065660e+09</td>\n",
- " <td>151.0</td>\n",
- " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
- " <td>2006</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>41489</th>\n",
- " <td>Rogue One: A Star Wars Story</td>\n",
- " <td>2016-12-14</td>\n",
- " <td>200000000.0</td>\n",
- " <td>1.056057e+09</td>\n",
- " <td>133.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
- " <td>2016</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>17124</th>\n",
- " <td>Pirates of the Caribbean: On Stranger Tides</td>\n",
- " <td>2011-05-14</td>\n",
- " <td>380000000.0</td>\n",
- " <td>1.045714e+09</td>\n",
- " <td>136.0</td>\n",
- " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...</td>\n",
- " <td>2011</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>38176</th>\n",
- " <td>Finding Dory</td>\n",
- " <td>2016-06-16</td>\n",
- " <td>200000000.0</td>\n",
- " <td>1.028571e+09</td>\n",
- " <td>97.0</td>\n",
- " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 16, '...</td>\n",
- " <td>2016</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>14892</th>\n",
- " <td>Alice in Wonderland</td>\n",
- " <td>2010-03-03</td>\n",
- " <td>200000000.0</td>\n",
- " <td>1.025491e+09</td>\n",
- " <td>108.0</td>\n",
- " <td>[{'id': 10751, 'name': 'Family'}, {'id': 14, '...</td>\n",
- " <td>2010</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>36253</th>\n",
- " <td>Zootopia</td>\n",
- " <td>2016-02-11</td>\n",
- " <td>150000000.0</td>\n",
- " <td>1.023784e+09</td>\n",
- " <td>108.0</td>\n",
- " <td>[{'id': 16, 'name': 'Animation'}, {'id': 12, '...</td>\n",
- " <td>2016</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>19971</th>\n",
- " <td>The Hobbit: An Unexpected Journey</td>\n",
- " <td>2012-11-26</td>\n",
- " <td>250000000.0</td>\n",
- " <td>1.021104e+09</td>\n",
- " <td>169.0</td>\n",
- " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
- " <td>2012</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>44009</th>\n",
- " <td>Despicable Me 3</td>\n",
- " <td>2017-06-15</td>\n",
- " <td>80000000.0</td>\n",
- " <td>1.020063e+09</td>\n",
- " <td>96.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 16, 'nam...</td>\n",
- " <td>2017</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>12481</th>\n",
- " <td>The Dark Knight</td>\n",
- " <td>2008-07-16</td>\n",
- " <td>185000000.0</td>\n",
- " <td>1.004558e+09</td>\n",
- " <td>152.0</td>\n",
- " <td>[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...</td>\n",
- " <td>2008</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " title release_date \\\n",
- "14551 Avatar 2009-12-10 \n",
- "26555 Star Wars: The Force Awakens 2015-12-15 \n",
- "1639 Titanic 1997-11-18 \n",
- "17818 The Avengers 2012-04-25 \n",
- "25084 Jurassic World 2015-06-09 \n",
- "28830 Furious 7 2015-04-01 \n",
- "26558 Avengers: Age of Ultron 2015-04-22 \n",
- "17437 Harry Potter and the Deathly Hallows: Part 2 2011-07-07 \n",
- "22110 Frozen 2013-11-27 \n",
- "42222 Beauty and the Beast 2017-03-16 \n",
- "43255 The Fate of the Furious 2017-04-12 \n",
- "20830 Iron Man 3 2013-04-18 \n",
- "30700 Minions 2015-06-17 \n",
- "26567 Captain America: Civil War 2016-04-27 \n",
- "17293 Transformers: Dark of the Moon 2011-06-28 \n",
- "7000 The Lord of the Rings: The Return of the King 2003-12-01 \n",
- "19261 Skyfall 2012-10-25 \n",
- "23617 Transformers: Age of Extinction 2014-06-25 \n",
- "18252 The Dark Knight Rises 2012-07-16 \n",
- "15348 Toy Story 3 2010-06-16 \n",
- "11008 Pirates of the Caribbean: Dead Man's Chest 2006-06-20 \n",
- "41489 Rogue One: A Star Wars Story 2016-12-14 \n",
- "17124 Pirates of the Caribbean: On Stranger Tides 2011-05-14 \n",
- "38176 Finding Dory 2016-06-16 \n",
- "14892 Alice in Wonderland 2010-03-03 \n",
- "36253 Zootopia 2016-02-11 \n",
- "19971 The Hobbit: An Unexpected Journey 2012-11-26 \n",
- "44009 Despicable Me 3 2017-06-15 \n",
- "12481 The Dark Knight 2008-07-16 \n",
- "\n",
- " budget revenue runtime \\\n",
- "14551 237000000.0 2.787965e+09 162.0 \n",
- "26555 245000000.0 2.068224e+09 136.0 \n",
- "1639 200000000.0 1.845034e+09 194.0 \n",
- "17818 220000000.0 1.519558e+09 143.0 \n",
- "25084 150000000.0 1.513529e+09 124.0 \n",
- "28830 190000000.0 1.506249e+09 137.0 \n",
- "26558 280000000.0 1.405404e+09 141.0 \n",
- "17437 125000000.0 1.342000e+09 130.0 \n",
- "22110 150000000.0 1.274219e+09 102.0 \n",
- "42222 160000000.0 1.262886e+09 129.0 \n",
- "43255 250000000.0 1.238765e+09 136.0 \n",
- "20830 200000000.0 1.215440e+09 130.0 \n",
- "30700 74000000.0 1.156731e+09 91.0 \n",
- "26567 250000000.0 1.153304e+09 147.0 \n",
- "17293 195000000.0 1.123747e+09 154.0 \n",
- "7000 94000000.0 1.118889e+09 201.0 \n",
- "19261 200000000.0 1.108561e+09 143.0 \n",
- "23617 210000000.0 1.091405e+09 165.0 \n",
- "18252 250000000.0 1.084939e+09 165.0 \n",
- "15348 200000000.0 1.066970e+09 103.0 \n",
- "11008 200000000.0 1.065660e+09 151.0 \n",
- "41489 200000000.0 1.056057e+09 133.0 \n",
- "17124 380000000.0 1.045714e+09 136.0 \n",
- "38176 200000000.0 1.028571e+09 97.0 \n",
- "14892 200000000.0 1.025491e+09 108.0 \n",
- "36253 150000000.0 1.023784e+09 108.0 \n",
- "19971 250000000.0 1.021104e+09 169.0 \n",
- "44009 80000000.0 1.020063e+09 96.0 \n",
- "12481 185000000.0 1.004558e+09 152.0 \n",
- "\n",
- " genres year \n",
- "14551 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2009 \n",
- "26555 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n",
- "1639 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... 1997 \n",
- "17818 [{'id': 878, 'name': 'Science Fiction'}, {'id'... 2012 \n",
- "25084 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n",
- "28830 [{'id': 28, 'name': 'Action'}] 2015 \n",
- "26558 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n",
- "17437 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2011 \n",
- "22110 [{'id': 16, 'name': 'Animation'}, {'id': 12, '... 2013 \n",
- "42222 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2017 \n",
- "43255 [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam... 2017 \n",
- "20830 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2013 \n",
- "30700 [{'id': 10751, 'name': 'Family'}, {'id': 16, '... 2015 \n",
- "26567 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... 2016 \n",
- "17293 [{'id': 28, 'name': 'Action'}, {'id': 878, 'na... 2011 \n",
- "7000 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2003 \n",
- "19261 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2012 \n",
- "23617 [{'id': 878, 'name': 'Science Fiction'}, {'id'... 2014 \n",
- "18252 [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam... 2012 \n",
- "15348 [{'id': 16, 'name': 'Animation'}, {'id': 10751... 2010 \n",
- "11008 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2006 \n",
- "41489 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2016 \n",
- "17124 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... 2011 \n",
- "38176 [{'id': 12, 'name': 'Adventure'}, {'id': 16, '... 2016 \n",
- "14892 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2010 \n",
- "36253 [{'id': 16, 'name': 'Animation'}, {'id': 12, '... 2016 \n",
- "19971 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2012 \n",
- "44009 [{'id': 28, 'name': 'Action'}, {'id': 16, 'nam... 2017 \n",
- "12481 [{'id': 18, 'name': 'Drama'}, {'id': 28, 'name... 2008 "
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Select only those movies which earned more than 1 billion\n",
- "new = small_df[small_df['revenue'] > 1e9]\n",
- "\n",
- "new"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>title</th>\n",
- " <th>release_date</th>\n",
- " <th>budget</th>\n",
- " <th>revenue</th>\n",
- " <th>runtime</th>\n",
- " <th>genres</th>\n",
- " <th>year</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>17437</th>\n",
- " <td>Harry Potter and the Deathly Hallows: Part 2</td>\n",
- " <td>2011-07-07</td>\n",
- " <td>125000000.0</td>\n",
- " <td>1.342000e+09</td>\n",
- " <td>130.0</td>\n",
- " <td>[{'id': 10751, 'name': 'Family'}, {'id': 14, '...</td>\n",
- " <td>2011</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>30700</th>\n",
- " <td>Minions</td>\n",
- " <td>2015-06-17</td>\n",
- " <td>74000000.0</td>\n",
- " <td>1.156731e+09</td>\n",
- " <td>91.0</td>\n",
- " <td>[{'id': 10751, 'name': 'Family'}, {'id': 16, '...</td>\n",
- " <td>2015</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7000</th>\n",
- " <td>The Lord of the Rings: The Return of the King</td>\n",
- " <td>2003-12-01</td>\n",
- " <td>94000000.0</td>\n",
- " <td>1.118889e+09</td>\n",
- " <td>201.0</td>\n",
- " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
- " <td>2003</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>44009</th>\n",
- " <td>Despicable Me 3</td>\n",
- " <td>2017-06-15</td>\n",
- " <td>80000000.0</td>\n",
- " <td>1.020063e+09</td>\n",
- " <td>96.0</td>\n",
- " <td>[{'id': 28, 'name': 'Action'}, {'id': 16, 'nam...</td>\n",
- " <td>2017</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " title release_date \\\n",
- "17437 Harry Potter and the Deathly Hallows: Part 2 2011-07-07 \n",
- "30700 Minions 2015-06-17 \n",
- "7000 The Lord of the Rings: The Return of the King 2003-12-01 \n",
- "44009 Despicable Me 3 2017-06-15 \n",
- "\n",
- " budget revenue runtime \\\n",
- "17437 125000000.0 1.342000e+09 130.0 \n",
- "30700 74000000.0 1.156731e+09 91.0 \n",
- "7000 94000000.0 1.118889e+09 201.0 \n",
- "44009 80000000.0 1.020063e+09 96.0 \n",
- "\n",
- " genres year \n",
- "17437 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2011 \n",
- "30700 [{'id': 10751, 'name': 'Family'}, {'id': 16, '... 2015 \n",
- "7000 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2003 \n",
- "44009 [{'id': 28, 'name': 'Action'}, {'id': 16, 'nam... 2017 "
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Select only those movies which earned more than 1 billion and spent less than 150 million\n",
- "\n",
- "new2 = small_df[(small_df['revenue'] > 1e9) & (small_df['budget'] < 1.5e8)]\n",
- "new2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "pandas.core.series.Series"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "type(small_df['year'])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1256.0\n",
- "0.0\n"
- ]
- }
- ],
- "source": [
- "\n",
- "#Get the runtime Series object\n",
- "runtime = small_df['runtime']\n",
- "\n",
- "#Print the longest runtime of any movie\n",
- "print(runtime.max())\n",
- "\n",
- "#Print the shortest runtime of any movie\n",
- "print(runtime.min())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "4224578.813474693\n",
- "0.0\n"
- ]
- }
- ],
- "source": [
- "#Get the budget Series object\n",
- "budget = small_df['budget']\n",
- "\n",
- "#Print the mean budget of the movies\n",
- "print(budget.mean())\n",
- "\n",
- "#Print the median budget of the movies\n",
- "print(budget.median())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "8267610.399999982"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Get the revenue Series object\n",
- "revenue = small_df['revenue']\n",
- "\n",
- "#Revenue generated by the 90th percentile movie\n",
- "revenue.quantile(0.90)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "2014 1974\n",
- "2015 1905\n",
- "2013 1889\n",
- "2012 1722\n",
- "2011 1667\n",
- " ... \n",
- "1887 1\n",
- "1883 1\n",
- "1893 1\n",
- "2020 1\n",
- "1878 1\n",
- "Name: year, Length: 136, dtype: int64"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Get number of movies released each year\n",
- "small_df['year'].value_counts()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.4"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|