{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'1.4.3'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "pd.__version__" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
adultbelongs_to_collectionbudgetgenreshomepageidimdb_idoriginal_languageoriginal_titleoverview...release_daterevenueruntimespoken_languagesstatustaglinetitlevideovote_averagevote_count
0False{'id': 10194, 'name': 'Toy Story Collection', ...30000000[{'id': 16, 'name': 'Animation'}, {'id': 35, '...http://toystory.disney.com/toy-story862tt0114709enToy StoryLed by Woody, Andy's toys live happily in his ......1995-10-30373554033.081.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedNaNToy StoryFalse7.75415.0
1FalseNaN65000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...NaN8844tt0113497enJumanjiWhen siblings Judy and Peter discover an encha......1995-12-15262797249.0104.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedRoll the dice and unleash the excitement!JumanjiFalse6.92413.0
2False{'id': 119050, 'name': 'Grumpy Old Men Collect...0[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...NaN15602tt0113228enGrumpier Old MenA family wedding reignites the ancient feud be......1995-12-220.0101.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedStill Yelling. Still Fighting. Still Ready for...Grumpier Old MenFalse6.592.0
3FalseNaN16000000[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...NaN31357tt0114885enWaiting to ExhaleCheated on, mistreated and stepped on, the wom......1995-12-2281452156.0127.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedFriends are the people who let you be yourself...Waiting to ExhaleFalse6.134.0
4False{'id': 96871, 'name': 'Father of the Bride Col...0[{'id': 35, 'name': 'Comedy'}]NaN11862tt0113041enFather of the Bride Part IIJust when George Banks has recovered from his ......1995-02-1076578911.0106.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedJust When His World Is Back To Normal... He's ...Father of the Bride Part IIFalse5.7173.0
\n", "

5 rows × 24 columns

\n", "
" ], "text/plain": [ " adult belongs_to_collection budget \\\n", "0 False {'id': 10194, 'name': 'Toy Story Collection', ... 30000000 \n", "1 False NaN 65000000 \n", "2 False {'id': 119050, 'name': 'Grumpy Old Men Collect... 0 \n", "3 False NaN 16000000 \n", "4 False {'id': 96871, 'name': 'Father of the Bride Col... 0 \n", "\n", " genres \\\n", "0 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n", "1 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "2 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... \n", "3 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n", "4 [{'id': 35, 'name': 'Comedy'}] \n", "\n", " homepage id imdb_id original_language \\\n", "0 http://toystory.disney.com/toy-story 862 tt0114709 en \n", "1 NaN 8844 tt0113497 en \n", "2 NaN 15602 tt0113228 en \n", "3 NaN 31357 tt0114885 en \n", "4 NaN 11862 tt0113041 en \n", "\n", " original_title \\\n", "0 Toy Story \n", "1 Jumanji \n", "2 Grumpier Old Men \n", "3 Waiting to Exhale \n", "4 Father of the Bride Part II \n", "\n", " overview ... release_date \\\n", "0 Led by Woody, Andy's toys live happily in his ... ... 1995-10-30 \n", "1 When siblings Judy and Peter discover an encha... ... 1995-12-15 \n", "2 A family wedding reignites the ancient feud be... ... 1995-12-22 \n", "3 Cheated on, mistreated and stepped on, the wom... ... 1995-12-22 \n", "4 Just when George Banks has recovered from his ... ... 1995-02-10 \n", "\n", " revenue runtime spoken_languages \\\n", "0 373554033.0 81.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "1 262797249.0 104.0 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... \n", "2 0.0 101.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "3 81452156.0 127.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "4 76578911.0 106.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "\n", " status tagline \\\n", "0 Released NaN \n", "1 Released Roll the dice and unleash the excitement! \n", "2 Released Still Yelling. Still Fighting. Still Ready for... \n", "3 Released Friends are the people who let you be yourself... \n", "4 Released Just When His World Is Back To Normal... He's ... \n", "\n", " title video vote_average vote_count \n", "0 Toy Story False 7.7 5415.0 \n", "1 Jumanji False 6.9 2413.0 \n", "2 Grumpier Old Men False 6.5 92.0 \n", "3 Waiting to Exhale False 6.1 34.0 \n", "4 Father of the Bride Part II False 5.7 173.0 \n", "\n", "[5 rows x 24 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Read the CSV File into df\n", "df = pd.read_csv('../data/movies_metadata.csv', low_memory=False)\n", "\n", "#We will find out what the following code does a little later!\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.frame.DataFrame" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Output the type of df\n", "type(df)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(45466, 24)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Output the shape of df\n", "df.shape" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',\n", " 'imdb_id', 'original_language', 'original_title', 'overview',\n", " 'popularity', 'poster_path', 'production_companies',\n", " 'production_countries', 'release_date', 'revenue', 'runtime',\n", " 'spoken_languages', 'status', 'tagline', 'title', 'video',\n", " 'vote_average', 'vote_count'],\n", " dtype='object')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Output the columns of df\n", "df.columns" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "adult False\n", "belongs_to_collection NaN\n", "budget 65000000\n", "genres [{'id': 12, 'name': 'Adventure'}, {'id': 14, '...\n", "homepage NaN\n", "id 8844\n", "imdb_id tt0113497\n", "original_language en\n", "original_title Jumanji\n", "overview When siblings Judy and Peter discover an encha...\n", "popularity 17.015539\n", "poster_path /vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg\n", "production_companies [{'name': 'TriStar Pictures', 'id': 559}, {'na...\n", "production_countries [{'iso_3166_1': 'US', 'name': 'United States o...\n", "release_date 1995-12-15\n", "revenue 262797249.0\n", "runtime 104.0\n", "spoken_languages [{'iso_639_1': 'en', 'name': 'English'}, {'iso...\n", "status Released\n", "tagline Roll the dice and unleash the excitement!\n", "title Jumanji\n", "video False\n", "vote_average 6.9\n", "vote_count 2413.0\n", "Name: 1, dtype: object" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Select the second movie in df\n", "second = df.iloc[1]\n", "second" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "adult False\n", "belongs_to_collection NaN\n", "budget 65000000\n", "genres [{'id': 12, 'name': 'Adventure'}, {'id': 14, '...\n", "homepage NaN\n", "id 8844\n", "imdb_id tt0113497\n", "original_language en\n", "original_title Jumanji\n", "overview When siblings Judy and Peter discover an encha...\n", "popularity 17.015539\n", "poster_path /vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg\n", "production_companies [{'name': 'TriStar Pictures', 'id': 559}, {'na...\n", "production_countries [{'iso_3166_1': 'US', 'name': 'United States o...\n", "release_date 1995-12-15\n", "revenue 262797249.0\n", "runtime 104.0\n", "spoken_languages [{'iso_639_1': 'en', 'name': 'English'}, {'iso...\n", "status Released\n", "tagline Roll the dice and unleash the excitement!\n", "video False\n", "vote_average 6.9\n", "vote_count 2413.0\n", "Name: Jumanji, dtype: object" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Change the index to the title\n", "df = df.set_index('title')\n", "\n", "#Access the movie with title 'Jumanji'\n", "jum = df.loc['Jumanji']\n", "jum" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "df = df.reset_index()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_datebudgetrevenueruntimegenres
0Toy Story1995-10-3030000000373554033.081.0[{'id': 16, 'name': 'Animation'}, {'id': 35, '...
1Jumanji1995-12-1565000000262797249.0104.0[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...
2Grumpier Old Men1995-12-2200.0101.0[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...
3Waiting to Exhale1995-12-221600000081452156.0127.0[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...
4Father of the Bride Part II1995-02-10076578911.0106.0[{'id': 35, 'name': 'Comedy'}]
\n", "
" ], "text/plain": [ " title release_date budget revenue runtime \\\n", "0 Toy Story 1995-10-30 30000000 373554033.0 81.0 \n", "1 Jumanji 1995-12-15 65000000 262797249.0 104.0 \n", "2 Grumpier Old Men 1995-12-22 0 0.0 101.0 \n", "3 Waiting to Exhale 1995-12-22 16000000 81452156.0 127.0 \n", "4 Father of the Bride Part II 1995-02-10 0 76578911.0 106.0 \n", "\n", " genres \n", "0 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n", "1 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "2 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... \n", "3 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n", "4 [{'id': 35, 'name': 'Comedy'}] " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Create a smaller dataframe with a subset of all features\n", "small_df = df[['title', 'release_date', 'budget', 'revenue', 'runtime', 'genres']]\n", "\n", "#Output only the first 5 rows of small_df\n", "small_df.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_datebudgetrevenueruntimegenres
0Toy Story1995-10-3030000000373554033.081.0[{'id': 16, 'name': 'Animation'}, {'id': 35, '...
1Jumanji1995-12-1565000000262797249.0104.0[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...
2Grumpier Old Men1995-12-2200.0101.0[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...
3Waiting to Exhale1995-12-221600000081452156.0127.0[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...
4Father of the Bride Part II1995-02-10076578911.0106.0[{'id': 35, 'name': 'Comedy'}]
5Heat1995-12-1560000000187436818.0170.0[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...
6Sabrina1995-12-15580000000.0127.0[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...
7Tom and Huck1995-12-2200.097.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
8Sudden Death1995-12-223500000064350171.0106.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
9GoldenEye1995-11-1658000000352194034.0130.0[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...
10The American President1995-11-1762000000107879496.0106.0[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...
11Dracula: Dead and Loving It1995-12-2200.088.0[{'id': 35, 'name': 'Comedy'}, {'id': 27, 'nam...
12Balto1995-12-22011348324.078.0[{'id': 10751, 'name': 'Family'}, {'id': 16, '...
13Nixon1995-12-224400000013681765.0192.0[{'id': 36, 'name': 'History'}, {'id': 18, 'na...
14Cutthroat Island1995-12-229800000010017322.0119.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
\n", "
" ], "text/plain": [ " title release_date budget revenue runtime \\\n", "0 Toy Story 1995-10-30 30000000 373554033.0 81.0 \n", "1 Jumanji 1995-12-15 65000000 262797249.0 104.0 \n", "2 Grumpier Old Men 1995-12-22 0 0.0 101.0 \n", "3 Waiting to Exhale 1995-12-22 16000000 81452156.0 127.0 \n", "4 Father of the Bride Part II 1995-02-10 0 76578911.0 106.0 \n", "5 Heat 1995-12-15 60000000 187436818.0 170.0 \n", "6 Sabrina 1995-12-15 58000000 0.0 127.0 \n", "7 Tom and Huck 1995-12-22 0 0.0 97.0 \n", "8 Sudden Death 1995-12-22 35000000 64350171.0 106.0 \n", "9 GoldenEye 1995-11-16 58000000 352194034.0 130.0 \n", "10 The American President 1995-11-17 62000000 107879496.0 106.0 \n", "11 Dracula: Dead and Loving It 1995-12-22 0 0.0 88.0 \n", "12 Balto 1995-12-22 0 11348324.0 78.0 \n", "13 Nixon 1995-12-22 44000000 13681765.0 192.0 \n", "14 Cutthroat Island 1995-12-22 98000000 10017322.0 119.0 \n", "\n", " genres \n", "0 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n", "1 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "2 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... \n", "3 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n", "4 [{'id': 35, 'name': 'Comedy'}] \n", "5 [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam... \n", "6 [{'id': 35, 'name': 'Comedy'}, {'id': 10749, '... \n", "7 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "8 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "9 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... \n", "10 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n", "11 [{'id': 35, 'name': 'Comedy'}, {'id': 27, 'nam... \n", "12 [{'id': 10751, 'name': 'Family'}, {'id': 16, '... \n", "13 [{'id': 36, 'name': 'History'}, {'id': 18, 'na... \n", "14 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Display the first 15 rows\n", "small_df.head(15)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 45466 entries, 0 to 45465\n", "Data columns (total 6 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 title 45460 non-null object \n", " 1 release_date 45379 non-null object \n", " 2 budget 45466 non-null object \n", " 3 revenue 45460 non-null float64\n", " 4 runtime 45203 non-null float64\n", " 5 genres 45466 non-null object \n", "dtypes: float64(2), object(4)\n", "memory usage: 2.1+ MB\n" ] } ], "source": [ "#Get information of the data types of each feature\n", "small_df.info()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "could not convert string to float: '/ff9qCepilowshEtG2GYWwzt2bs4.jpg'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Input \u001b[0;32mIn [13]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbudget\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfloat\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/generic.py:5912\u001b[0m, in \u001b[0;36mNDFrame.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 5905\u001b[0m results \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 5906\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miloc[:, i]\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m 5907\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns))\n\u001b[1;32m 5908\u001b[0m ]\n\u001b[1;32m 5910\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 5911\u001b[0m \u001b[38;5;66;03m# else, only a single dtype is given\u001b[39;00m\n\u001b[0;32m-> 5912\u001b[0m new_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5913\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_constructor(new_data)\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mastype\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5915\u001b[0m \u001b[38;5;66;03m# GH 33113: handle empty frame or series\u001b[39;00m\n", "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/internals/managers.py:419\u001b[0m, in \u001b[0;36mBaseBlockManager.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mastype\u001b[39m(\u001b[38;5;28mself\u001b[39m: T, dtype, copy: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, errors: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[0;32m--> 419\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mastype\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/internals/managers.py:304\u001b[0m, in \u001b[0;36mBaseBlockManager.apply\u001b[0;34m(self, f, align_keys, ignore_failures, **kwargs)\u001b[0m\n\u001b[1;32m 302\u001b[0m applied \u001b[38;5;241m=\u001b[39m b\u001b[38;5;241m.\u001b[39mapply(f, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 304\u001b[0m applied \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mNotImplementedError\u001b[39;00m):\n\u001b[1;32m 306\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m ignore_failures:\n", "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/internals/blocks.py:580\u001b[0m, in \u001b[0;36mBlock.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 563\u001b[0m \u001b[38;5;124;03mCoerce to the new dtype.\u001b[39;00m\n\u001b[1;32m 564\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 576\u001b[0m \u001b[38;5;124;03mBlock\u001b[39;00m\n\u001b[1;32m 577\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 578\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m--> 580\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array_safe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 582\u001b[0m new_values \u001b[38;5;241m=\u001b[39m maybe_coerce_values(new_values)\n\u001b[1;32m 583\u001b[0m newb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmake_block(new_values)\n", "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1292\u001b[0m, in \u001b[0;36mastype_array_safe\u001b[0;34m(values, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 1289\u001b[0m dtype \u001b[38;5;241m=\u001b[39m dtype\u001b[38;5;241m.\u001b[39mnumpy_dtype\n\u001b[1;32m 1291\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1292\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m# e.g. astype_nansafe can fail on object-dtype of strings\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# trying to convert to float\u001b[39;00m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1237\u001b[0m, in \u001b[0;36mastype_array\u001b[0;34m(values, dtype, copy)\u001b[0m\n\u001b[1;32m 1234\u001b[0m values \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1237\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_nansafe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;66;03m# in pandas we don't store numpy str dtypes, so convert to object\u001b[39;00m\n\u001b[1;32m 1240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(dtype, np\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(values\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtype, \u001b[38;5;28mstr\u001b[39m):\n", "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1181\u001b[0m, in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 1177\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 1179\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copy \u001b[38;5;129;01mor\u001b[39;00m is_object_dtype(arr\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mor\u001b[39;00m is_object_dtype(dtype):\n\u001b[1;32m 1180\u001b[0m \u001b[38;5;66;03m# Explicit copy, or required since NumPy can't view from / to object.\u001b[39;00m\n\u001b[0;32m-> 1181\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43marr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 1183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n", "\u001b[0;31mValueError\u001b[0m: could not convert string to float: '/ff9qCepilowshEtG2GYWwzt2bs4.jpg'" ] } ], "source": [ "df['budget'].astype('float')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 45466 entries, 0 to 45465\n", "Data columns (total 6 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 title 45460 non-null object \n", " 1 release_date 45379 non-null object \n", " 2 budget 45463 non-null float64\n", " 3 revenue 45460 non-null float64\n", " 4 runtime 45203 non-null float64\n", " 5 genres 45466 non-null object \n", "dtypes: float64(3), object(3)\n", "memory usage: 2.1+ MB\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/1765380320.py:13: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " small_df['budget'] = small_df['budget'].apply(to_float)\n", "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/1765380320.py:16: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " small_df['budget'] = small_df['budget'].astype('float')\n" ] } ], "source": [ "#Import the numpy library \n", "import numpy as np\n", "\n", "#Function to convert to float manually\n", "def to_float(x):\n", " try:\n", " x = float(x)\n", " except: \n", " x = np.nan\n", " return x\n", "\n", "#Apply the to_float function to all values in the budget column\n", "small_df['budget'] = small_df['budget'].apply(to_float)\n", "\n", "#Try converting to float using pandas astype\n", "small_df['budget'] = small_df['budget'].astype('float')\n", "\n", "#Get the data types for all features\n", "small_df.info()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/2397457688.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " small_df['release_date'] = pd.to_datetime(small_df['release_date'], errors='coerce')\n", "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/2397457688.py:5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " small_df['year'] = small_df['release_date'].apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)\n" ] } ], "source": [ "#Convert release_date into pandas datetime format\n", "small_df['release_date'] = pd.to_datetime(small_df['release_date'], errors='coerce')\n", "\n", "#Extract year from the datetime\n", "small_df['year'] = small_df['release_date'].apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_datebudgetrevenueruntimegenresyear
34940Passage of Venus1874-12-090.00.01.0[{'id': 99, 'name': 'Documentary'}]1874
34937Sallie Gardner at a Gallop1878-06-140.00.01.0[{'id': 99, 'name': 'Documentary'}]1878
41602Buffalo Running1883-11-190.00.01.0[{'id': 99, 'name': 'Documentary'}]1883
34933Man Walking Around a Corner1887-08-180.00.01.0[{'id': 99, 'name': 'Documentary'}]1887
34938Traffic Crossing Leeds Bridge1888-10-150.00.01.0[{'id': 99, 'name': 'Documentary'}]1888
\n", "
" ], "text/plain": [ " title release_date budget revenue runtime \\\n", "34940 Passage of Venus 1874-12-09 0.0 0.0 1.0 \n", "34937 Sallie Gardner at a Gallop 1878-06-14 0.0 0.0 1.0 \n", "41602 Buffalo Running 1883-11-19 0.0 0.0 1.0 \n", "34933 Man Walking Around a Corner 1887-08-18 0.0 0.0 1.0 \n", "34938 Traffic Crossing Leeds Bridge 1888-10-15 0.0 0.0 1.0 \n", "\n", " genres year \n", "34940 [{'id': 99, 'name': 'Documentary'}] 1874 \n", "34937 [{'id': 99, 'name': 'Documentary'}] 1878 \n", "41602 [{'id': 99, 'name': 'Documentary'}] 1883 \n", "34933 [{'id': 99, 'name': 'Documentary'}] 1887 \n", "34938 [{'id': 99, 'name': 'Documentary'}] 1888 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Sort DataFrame based on release year\n", "small_df = small_df.sort_values('year')\n", "\n", "small_df.head()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_datebudgetrevenueruntimegenresyear
14551Avatar2009-12-10237000000.02.787965e+09162.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...2009
26555Star Wars: The Force Awakens2015-12-15245000000.02.068224e+09136.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...2015
1639Titanic1997-11-18200000000.01.845034e+09194.0[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...1997
17818The Avengers2012-04-25220000000.01.519558e+09143.0[{'id': 878, 'name': 'Science Fiction'}, {'id'...2012
25084Jurassic World2015-06-09150000000.01.513529e+09124.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...2015
\n", "
" ], "text/plain": [ " title release_date budget revenue \\\n", "14551 Avatar 2009-12-10 237000000.0 2.787965e+09 \n", "26555 Star Wars: The Force Awakens 2015-12-15 245000000.0 2.068224e+09 \n", "1639 Titanic 1997-11-18 200000000.0 1.845034e+09 \n", "17818 The Avengers 2012-04-25 220000000.0 1.519558e+09 \n", "25084 Jurassic World 2015-06-09 150000000.0 1.513529e+09 \n", "\n", " runtime genres year \n", "14551 162.0 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2009 \n", "26555 136.0 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n", "1639 194.0 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... 1997 \n", "17818 143.0 [{'id': 878, 'name': 'Science Fiction'}, {'id'... 2012 \n", "25084 124.0 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Sort Movies based on revenue (in descending order)\n", "small_df = small_df.sort_values('revenue', ascending=False)\n", "\n", "small_df.head()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_datebudgetrevenueruntimegenresyear
14551Avatar2009-12-10237000000.02.787965e+09162.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...2009
26555Star Wars: The Force Awakens2015-12-15245000000.02.068224e+09136.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...2015
1639Titanic1997-11-18200000000.01.845034e+09194.0[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...1997
17818The Avengers2012-04-25220000000.01.519558e+09143.0[{'id': 878, 'name': 'Science Fiction'}, {'id'...2012
25084Jurassic World2015-06-09150000000.01.513529e+09124.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...2015
28830Furious 72015-04-01190000000.01.506249e+09137.0[{'id': 28, 'name': 'Action'}]2015
26558Avengers: Age of Ultron2015-04-22280000000.01.405404e+09141.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...2015
17437Harry Potter and the Deathly Hallows: Part 22011-07-07125000000.01.342000e+09130.0[{'id': 10751, 'name': 'Family'}, {'id': 14, '...2011
22110Frozen2013-11-27150000000.01.274219e+09102.0[{'id': 16, 'name': 'Animation'}, {'id': 12, '...2013
42222Beauty and the Beast2017-03-16160000000.01.262886e+09129.0[{'id': 10751, 'name': 'Family'}, {'id': 14, '...2017
43255The Fate of the Furious2017-04-12250000000.01.238765e+09136.0[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...2017
20830Iron Man 32013-04-18200000000.01.215440e+09130.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...2013
30700Minions2015-06-1774000000.01.156731e+0991.0[{'id': 10751, 'name': 'Family'}, {'id': 16, '...2015
26567Captain America: Civil War2016-04-27250000000.01.153304e+09147.0[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...2016
17293Transformers: Dark of the Moon2011-06-28195000000.01.123747e+09154.0[{'id': 28, 'name': 'Action'}, {'id': 878, 'na...2011
7000The Lord of the Rings: The Return of the King2003-12-0194000000.01.118889e+09201.0[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...2003
19261Skyfall2012-10-25200000000.01.108561e+09143.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...2012
23617Transformers: Age of Extinction2014-06-25210000000.01.091405e+09165.0[{'id': 878, 'name': 'Science Fiction'}, {'id'...2014
18252The Dark Knight Rises2012-07-16250000000.01.084939e+09165.0[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...2012
15348Toy Story 32010-06-16200000000.01.066970e+09103.0[{'id': 16, 'name': 'Animation'}, {'id': 10751...2010
11008Pirates of the Caribbean: Dead Man's Chest2006-06-20200000000.01.065660e+09151.0[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...2006
41489Rogue One: A Star Wars Story2016-12-14200000000.01.056057e+09133.0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...2016
17124Pirates of the Caribbean: On Stranger Tides2011-05-14380000000.01.045714e+09136.0[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...2011
38176Finding Dory2016-06-16200000000.01.028571e+0997.0[{'id': 12, 'name': 'Adventure'}, {'id': 16, '...2016
14892Alice in Wonderland2010-03-03200000000.01.025491e+09108.0[{'id': 10751, 'name': 'Family'}, {'id': 14, '...2010
36253Zootopia2016-02-11150000000.01.023784e+09108.0[{'id': 16, 'name': 'Animation'}, {'id': 12, '...2016
19971The Hobbit: An Unexpected Journey2012-11-26250000000.01.021104e+09169.0[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...2012
44009Despicable Me 32017-06-1580000000.01.020063e+0996.0[{'id': 28, 'name': 'Action'}, {'id': 16, 'nam...2017
12481The Dark Knight2008-07-16185000000.01.004558e+09152.0[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...2008
\n", "
" ], "text/plain": [ " title release_date \\\n", "14551 Avatar 2009-12-10 \n", "26555 Star Wars: The Force Awakens 2015-12-15 \n", "1639 Titanic 1997-11-18 \n", "17818 The Avengers 2012-04-25 \n", "25084 Jurassic World 2015-06-09 \n", "28830 Furious 7 2015-04-01 \n", "26558 Avengers: Age of Ultron 2015-04-22 \n", "17437 Harry Potter and the Deathly Hallows: Part 2 2011-07-07 \n", "22110 Frozen 2013-11-27 \n", "42222 Beauty and the Beast 2017-03-16 \n", "43255 The Fate of the Furious 2017-04-12 \n", "20830 Iron Man 3 2013-04-18 \n", "30700 Minions 2015-06-17 \n", "26567 Captain America: Civil War 2016-04-27 \n", "17293 Transformers: Dark of the Moon 2011-06-28 \n", "7000 The Lord of the Rings: The Return of the King 2003-12-01 \n", "19261 Skyfall 2012-10-25 \n", "23617 Transformers: Age of Extinction 2014-06-25 \n", "18252 The Dark Knight Rises 2012-07-16 \n", "15348 Toy Story 3 2010-06-16 \n", "11008 Pirates of the Caribbean: Dead Man's Chest 2006-06-20 \n", "41489 Rogue One: A Star Wars Story 2016-12-14 \n", "17124 Pirates of the Caribbean: On Stranger Tides 2011-05-14 \n", "38176 Finding Dory 2016-06-16 \n", "14892 Alice in Wonderland 2010-03-03 \n", "36253 Zootopia 2016-02-11 \n", "19971 The Hobbit: An Unexpected Journey 2012-11-26 \n", "44009 Despicable Me 3 2017-06-15 \n", "12481 The Dark Knight 2008-07-16 \n", "\n", " budget revenue runtime \\\n", "14551 237000000.0 2.787965e+09 162.0 \n", "26555 245000000.0 2.068224e+09 136.0 \n", "1639 200000000.0 1.845034e+09 194.0 \n", "17818 220000000.0 1.519558e+09 143.0 \n", "25084 150000000.0 1.513529e+09 124.0 \n", "28830 190000000.0 1.506249e+09 137.0 \n", "26558 280000000.0 1.405404e+09 141.0 \n", "17437 125000000.0 1.342000e+09 130.0 \n", "22110 150000000.0 1.274219e+09 102.0 \n", "42222 160000000.0 1.262886e+09 129.0 \n", "43255 250000000.0 1.238765e+09 136.0 \n", "20830 200000000.0 1.215440e+09 130.0 \n", "30700 74000000.0 1.156731e+09 91.0 \n", "26567 250000000.0 1.153304e+09 147.0 \n", "17293 195000000.0 1.123747e+09 154.0 \n", "7000 94000000.0 1.118889e+09 201.0 \n", "19261 200000000.0 1.108561e+09 143.0 \n", "23617 210000000.0 1.091405e+09 165.0 \n", "18252 250000000.0 1.084939e+09 165.0 \n", "15348 200000000.0 1.066970e+09 103.0 \n", "11008 200000000.0 1.065660e+09 151.0 \n", "41489 200000000.0 1.056057e+09 133.0 \n", "17124 380000000.0 1.045714e+09 136.0 \n", "38176 200000000.0 1.028571e+09 97.0 \n", "14892 200000000.0 1.025491e+09 108.0 \n", "36253 150000000.0 1.023784e+09 108.0 \n", "19971 250000000.0 1.021104e+09 169.0 \n", "44009 80000000.0 1.020063e+09 96.0 \n", "12481 185000000.0 1.004558e+09 152.0 \n", "\n", " genres year \n", "14551 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2009 \n", "26555 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n", "1639 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... 1997 \n", "17818 [{'id': 878, 'name': 'Science Fiction'}, {'id'... 2012 \n", "25084 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n", "28830 [{'id': 28, 'name': 'Action'}] 2015 \n", "26558 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n", "17437 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2011 \n", "22110 [{'id': 16, 'name': 'Animation'}, {'id': 12, '... 2013 \n", "42222 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2017 \n", "43255 [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam... 2017 \n", "20830 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2013 \n", "30700 [{'id': 10751, 'name': 'Family'}, {'id': 16, '... 2015 \n", "26567 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... 2016 \n", "17293 [{'id': 28, 'name': 'Action'}, {'id': 878, 'na... 2011 \n", "7000 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2003 \n", "19261 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2012 \n", "23617 [{'id': 878, 'name': 'Science Fiction'}, {'id'... 2014 \n", "18252 [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam... 2012 \n", "15348 [{'id': 16, 'name': 'Animation'}, {'id': 10751... 2010 \n", "11008 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2006 \n", "41489 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2016 \n", "17124 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... 2011 \n", "38176 [{'id': 12, 'name': 'Adventure'}, {'id': 16, '... 2016 \n", "14892 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2010 \n", "36253 [{'id': 16, 'name': 'Animation'}, {'id': 12, '... 2016 \n", "19971 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2012 \n", "44009 [{'id': 28, 'name': 'Action'}, {'id': 16, 'nam... 2017 \n", "12481 [{'id': 18, 'name': 'Drama'}, {'id': 28, 'name... 2008 " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Select only those movies which earned more than 1 billion\n", "new = small_df[small_df['revenue'] > 1e9]\n", "\n", "new" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerelease_datebudgetrevenueruntimegenresyear
17437Harry Potter and the Deathly Hallows: Part 22011-07-07125000000.01.342000e+09130.0[{'id': 10751, 'name': 'Family'}, {'id': 14, '...2011
30700Minions2015-06-1774000000.01.156731e+0991.0[{'id': 10751, 'name': 'Family'}, {'id': 16, '...2015
7000The Lord of the Rings: The Return of the King2003-12-0194000000.01.118889e+09201.0[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...2003
44009Despicable Me 32017-06-1580000000.01.020063e+0996.0[{'id': 28, 'name': 'Action'}, {'id': 16, 'nam...2017
\n", "
" ], "text/plain": [ " title release_date \\\n", "17437 Harry Potter and the Deathly Hallows: Part 2 2011-07-07 \n", "30700 Minions 2015-06-17 \n", "7000 The Lord of the Rings: The Return of the King 2003-12-01 \n", "44009 Despicable Me 3 2017-06-15 \n", "\n", " budget revenue runtime \\\n", "17437 125000000.0 1.342000e+09 130.0 \n", "30700 74000000.0 1.156731e+09 91.0 \n", "7000 94000000.0 1.118889e+09 201.0 \n", "44009 80000000.0 1.020063e+09 96.0 \n", "\n", " genres year \n", "17437 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2011 \n", "30700 [{'id': 10751, 'name': 'Family'}, {'id': 16, '... 2015 \n", "7000 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2003 \n", "44009 [{'id': 28, 'name': 'Action'}, {'id': 16, 'nam... 2017 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Select only those movies which earned more than 1 billion and spent less than 150 million\n", "\n", "new2 = small_df[(small_df['revenue'] > 1e9) & (small_df['budget'] < 1.5e8)]\n", "new2" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.series.Series" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(small_df['year'])" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1256.0\n", "0.0\n" ] } ], "source": [ "\n", "#Get the runtime Series object\n", "runtime = small_df['runtime']\n", "\n", "#Print the longest runtime of any movie\n", "print(runtime.max())\n", "\n", "#Print the shortest runtime of any movie\n", "print(runtime.min())" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4224578.813474693\n", "0.0\n" ] } ], "source": [ "#Get the budget Series object\n", "budget = small_df['budget']\n", "\n", "#Print the mean budget of the movies\n", "print(budget.mean())\n", "\n", "#Print the median budget of the movies\n", "print(budget.median())" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8267610.399999982" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Get the revenue Series object\n", "revenue = small_df['revenue']\n", "\n", "#Revenue generated by the 90th percentile movie\n", "revenue.quantile(0.90)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2014 1974\n", "2015 1905\n", "2013 1889\n", "2012 1722\n", "2011 1667\n", " ... \n", "1887 1\n", "1883 1\n", "1893 1\n", "2020 1\n", "1878 1\n", "Name: year, Length: 136, dtype: int64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Get number of movies released each year\n", "small_df['year'].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 2 }