暂无描述

Chapter2.ipynb 79KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [
  8. {
  9. "data": {
  10. "text/plain": [
  11. "'1.4.3'"
  12. ]
  13. },
  14. "execution_count": 1,
  15. "metadata": {},
  16. "output_type": "execute_result"
  17. }
  18. ],
  19. "source": [
  20. "import pandas as pd\n",
  21. "pd.__version__"
  22. ]
  23. },
  24. {
  25. "cell_type": "code",
  26. "execution_count": 2,
  27. "metadata": {},
  28. "outputs": [
  29. {
  30. "data": {
  31. "text/html": [
  32. "<div>\n",
  33. "<style scoped>\n",
  34. " .dataframe tbody tr th:only-of-type {\n",
  35. " vertical-align: middle;\n",
  36. " }\n",
  37. "\n",
  38. " .dataframe tbody tr th {\n",
  39. " vertical-align: top;\n",
  40. " }\n",
  41. "\n",
  42. " .dataframe thead th {\n",
  43. " text-align: right;\n",
  44. " }\n",
  45. "</style>\n",
  46. "<table border=\"1\" class=\"dataframe\">\n",
  47. " <thead>\n",
  48. " <tr style=\"text-align: right;\">\n",
  49. " <th></th>\n",
  50. " <th>adult</th>\n",
  51. " <th>belongs_to_collection</th>\n",
  52. " <th>budget</th>\n",
  53. " <th>genres</th>\n",
  54. " <th>homepage</th>\n",
  55. " <th>id</th>\n",
  56. " <th>imdb_id</th>\n",
  57. " <th>original_language</th>\n",
  58. " <th>original_title</th>\n",
  59. " <th>overview</th>\n",
  60. " <th>...</th>\n",
  61. " <th>release_date</th>\n",
  62. " <th>revenue</th>\n",
  63. " <th>runtime</th>\n",
  64. " <th>spoken_languages</th>\n",
  65. " <th>status</th>\n",
  66. " <th>tagline</th>\n",
  67. " <th>title</th>\n",
  68. " <th>video</th>\n",
  69. " <th>vote_average</th>\n",
  70. " <th>vote_count</th>\n",
  71. " </tr>\n",
  72. " </thead>\n",
  73. " <tbody>\n",
  74. " <tr>\n",
  75. " <th>0</th>\n",
  76. " <td>False</td>\n",
  77. " <td>{'id': 10194, 'name': 'Toy Story Collection', ...</td>\n",
  78. " <td>30000000</td>\n",
  79. " <td>[{'id': 16, 'name': 'Animation'}, {'id': 35, '...</td>\n",
  80. " <td>http://toystory.disney.com/toy-story</td>\n",
  81. " <td>862</td>\n",
  82. " <td>tt0114709</td>\n",
  83. " <td>en</td>\n",
  84. " <td>Toy Story</td>\n",
  85. " <td>Led by Woody, Andy's toys live happily in his ...</td>\n",
  86. " <td>...</td>\n",
  87. " <td>1995-10-30</td>\n",
  88. " <td>373554033.0</td>\n",
  89. " <td>81.0</td>\n",
  90. " <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
  91. " <td>Released</td>\n",
  92. " <td>NaN</td>\n",
  93. " <td>Toy Story</td>\n",
  94. " <td>False</td>\n",
  95. " <td>7.7</td>\n",
  96. " <td>5415.0</td>\n",
  97. " </tr>\n",
  98. " <tr>\n",
  99. " <th>1</th>\n",
  100. " <td>False</td>\n",
  101. " <td>NaN</td>\n",
  102. " <td>65000000</td>\n",
  103. " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
  104. " <td>NaN</td>\n",
  105. " <td>8844</td>\n",
  106. " <td>tt0113497</td>\n",
  107. " <td>en</td>\n",
  108. " <td>Jumanji</td>\n",
  109. " <td>When siblings Judy and Peter discover an encha...</td>\n",
  110. " <td>...</td>\n",
  111. " <td>1995-12-15</td>\n",
  112. " <td>262797249.0</td>\n",
  113. " <td>104.0</td>\n",
  114. " <td>[{'iso_639_1': 'en', 'name': 'English'}, {'iso...</td>\n",
  115. " <td>Released</td>\n",
  116. " <td>Roll the dice and unleash the excitement!</td>\n",
  117. " <td>Jumanji</td>\n",
  118. " <td>False</td>\n",
  119. " <td>6.9</td>\n",
  120. " <td>2413.0</td>\n",
  121. " </tr>\n",
  122. " <tr>\n",
  123. " <th>2</th>\n",
  124. " <td>False</td>\n",
  125. " <td>{'id': 119050, 'name': 'Grumpy Old Men Collect...</td>\n",
  126. " <td>0</td>\n",
  127. " <td>[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...</td>\n",
  128. " <td>NaN</td>\n",
  129. " <td>15602</td>\n",
  130. " <td>tt0113228</td>\n",
  131. " <td>en</td>\n",
  132. " <td>Grumpier Old Men</td>\n",
  133. " <td>A family wedding reignites the ancient feud be...</td>\n",
  134. " <td>...</td>\n",
  135. " <td>1995-12-22</td>\n",
  136. " <td>0.0</td>\n",
  137. " <td>101.0</td>\n",
  138. " <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
  139. " <td>Released</td>\n",
  140. " <td>Still Yelling. Still Fighting. Still Ready for...</td>\n",
  141. " <td>Grumpier Old Men</td>\n",
  142. " <td>False</td>\n",
  143. " <td>6.5</td>\n",
  144. " <td>92.0</td>\n",
  145. " </tr>\n",
  146. " <tr>\n",
  147. " <th>3</th>\n",
  148. " <td>False</td>\n",
  149. " <td>NaN</td>\n",
  150. " <td>16000000</td>\n",
  151. " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...</td>\n",
  152. " <td>NaN</td>\n",
  153. " <td>31357</td>\n",
  154. " <td>tt0114885</td>\n",
  155. " <td>en</td>\n",
  156. " <td>Waiting to Exhale</td>\n",
  157. " <td>Cheated on, mistreated and stepped on, the wom...</td>\n",
  158. " <td>...</td>\n",
  159. " <td>1995-12-22</td>\n",
  160. " <td>81452156.0</td>\n",
  161. " <td>127.0</td>\n",
  162. " <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
  163. " <td>Released</td>\n",
  164. " <td>Friends are the people who let you be yourself...</td>\n",
  165. " <td>Waiting to Exhale</td>\n",
  166. " <td>False</td>\n",
  167. " <td>6.1</td>\n",
  168. " <td>34.0</td>\n",
  169. " </tr>\n",
  170. " <tr>\n",
  171. " <th>4</th>\n",
  172. " <td>False</td>\n",
  173. " <td>{'id': 96871, 'name': 'Father of the Bride Col...</td>\n",
  174. " <td>0</td>\n",
  175. " <td>[{'id': 35, 'name': 'Comedy'}]</td>\n",
  176. " <td>NaN</td>\n",
  177. " <td>11862</td>\n",
  178. " <td>tt0113041</td>\n",
  179. " <td>en</td>\n",
  180. " <td>Father of the Bride Part II</td>\n",
  181. " <td>Just when George Banks has recovered from his ...</td>\n",
  182. " <td>...</td>\n",
  183. " <td>1995-02-10</td>\n",
  184. " <td>76578911.0</td>\n",
  185. " <td>106.0</td>\n",
  186. " <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
  187. " <td>Released</td>\n",
  188. " <td>Just When His World Is Back To Normal... He's ...</td>\n",
  189. " <td>Father of the Bride Part II</td>\n",
  190. " <td>False</td>\n",
  191. " <td>5.7</td>\n",
  192. " <td>173.0</td>\n",
  193. " </tr>\n",
  194. " </tbody>\n",
  195. "</table>\n",
  196. "<p>5 rows × 24 columns</p>\n",
  197. "</div>"
  198. ],
  199. "text/plain": [
  200. " adult belongs_to_collection budget \\\n",
  201. "0 False {'id': 10194, 'name': 'Toy Story Collection', ... 30000000 \n",
  202. "1 False NaN 65000000 \n",
  203. "2 False {'id': 119050, 'name': 'Grumpy Old Men Collect... 0 \n",
  204. "3 False NaN 16000000 \n",
  205. "4 False {'id': 96871, 'name': 'Father of the Bride Col... 0 \n",
  206. "\n",
  207. " genres \\\n",
  208. "0 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n",
  209. "1 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n",
  210. "2 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... \n",
  211. "3 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n",
  212. "4 [{'id': 35, 'name': 'Comedy'}] \n",
  213. "\n",
  214. " homepage id imdb_id original_language \\\n",
  215. "0 http://toystory.disney.com/toy-story 862 tt0114709 en \n",
  216. "1 NaN 8844 tt0113497 en \n",
  217. "2 NaN 15602 tt0113228 en \n",
  218. "3 NaN 31357 tt0114885 en \n",
  219. "4 NaN 11862 tt0113041 en \n",
  220. "\n",
  221. " original_title \\\n",
  222. "0 Toy Story \n",
  223. "1 Jumanji \n",
  224. "2 Grumpier Old Men \n",
  225. "3 Waiting to Exhale \n",
  226. "4 Father of the Bride Part II \n",
  227. "\n",
  228. " overview ... release_date \\\n",
  229. "0 Led by Woody, Andy's toys live happily in his ... ... 1995-10-30 \n",
  230. "1 When siblings Judy and Peter discover an encha... ... 1995-12-15 \n",
  231. "2 A family wedding reignites the ancient feud be... ... 1995-12-22 \n",
  232. "3 Cheated on, mistreated and stepped on, the wom... ... 1995-12-22 \n",
  233. "4 Just when George Banks has recovered from his ... ... 1995-02-10 \n",
  234. "\n",
  235. " revenue runtime spoken_languages \\\n",
  236. "0 373554033.0 81.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
  237. "1 262797249.0 104.0 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... \n",
  238. "2 0.0 101.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
  239. "3 81452156.0 127.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
  240. "4 76578911.0 106.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
  241. "\n",
  242. " status tagline \\\n",
  243. "0 Released NaN \n",
  244. "1 Released Roll the dice and unleash the excitement! \n",
  245. "2 Released Still Yelling. Still Fighting. Still Ready for... \n",
  246. "3 Released Friends are the people who let you be yourself... \n",
  247. "4 Released Just When His World Is Back To Normal... He's ... \n",
  248. "\n",
  249. " title video vote_average vote_count \n",
  250. "0 Toy Story False 7.7 5415.0 \n",
  251. "1 Jumanji False 6.9 2413.0 \n",
  252. "2 Grumpier Old Men False 6.5 92.0 \n",
  253. "3 Waiting to Exhale False 6.1 34.0 \n",
  254. "4 Father of the Bride Part II False 5.7 173.0 \n",
  255. "\n",
  256. "[5 rows x 24 columns]"
  257. ]
  258. },
  259. "execution_count": 2,
  260. "metadata": {},
  261. "output_type": "execute_result"
  262. }
  263. ],
  264. "source": [
  265. "#Read the CSV File into df\n",
  266. "df = pd.read_csv('../data/movies_metadata.csv', low_memory=False)\n",
  267. "\n",
  268. "#We will find out what the following code does a little later!\n",
  269. "df.head()"
  270. ]
  271. },
  272. {
  273. "cell_type": "code",
  274. "execution_count": 3,
  275. "metadata": {},
  276. "outputs": [
  277. {
  278. "data": {
  279. "text/plain": [
  280. "pandas.core.frame.DataFrame"
  281. ]
  282. },
  283. "execution_count": 3,
  284. "metadata": {},
  285. "output_type": "execute_result"
  286. }
  287. ],
  288. "source": [
  289. "#Output the type of df\n",
  290. "type(df)"
  291. ]
  292. },
  293. {
  294. "cell_type": "code",
  295. "execution_count": 4,
  296. "metadata": {},
  297. "outputs": [
  298. {
  299. "data": {
  300. "text/plain": [
  301. "(45466, 24)"
  302. ]
  303. },
  304. "execution_count": 4,
  305. "metadata": {},
  306. "output_type": "execute_result"
  307. }
  308. ],
  309. "source": [
  310. "#Output the shape of df\n",
  311. "df.shape"
  312. ]
  313. },
  314. {
  315. "cell_type": "code",
  316. "execution_count": 5,
  317. "metadata": {},
  318. "outputs": [
  319. {
  320. "data": {
  321. "text/plain": [
  322. "Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',\n",
  323. " 'imdb_id', 'original_language', 'original_title', 'overview',\n",
  324. " 'popularity', 'poster_path', 'production_companies',\n",
  325. " 'production_countries', 'release_date', 'revenue', 'runtime',\n",
  326. " 'spoken_languages', 'status', 'tagline', 'title', 'video',\n",
  327. " 'vote_average', 'vote_count'],\n",
  328. " dtype='object')"
  329. ]
  330. },
  331. "execution_count": 5,
  332. "metadata": {},
  333. "output_type": "execute_result"
  334. }
  335. ],
  336. "source": [
  337. "#Output the columns of df\n",
  338. "df.columns"
  339. ]
  340. },
  341. {
  342. "cell_type": "code",
  343. "execution_count": 6,
  344. "metadata": {},
  345. "outputs": [
  346. {
  347. "data": {
  348. "text/plain": [
  349. "adult False\n",
  350. "belongs_to_collection NaN\n",
  351. "budget 65000000\n",
  352. "genres [{'id': 12, 'name': 'Adventure'}, {'id': 14, '...\n",
  353. "homepage NaN\n",
  354. "id 8844\n",
  355. "imdb_id tt0113497\n",
  356. "original_language en\n",
  357. "original_title Jumanji\n",
  358. "overview When siblings Judy and Peter discover an encha...\n",
  359. "popularity 17.015539\n",
  360. "poster_path /vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg\n",
  361. "production_companies [{'name': 'TriStar Pictures', 'id': 559}, {'na...\n",
  362. "production_countries [{'iso_3166_1': 'US', 'name': 'United States o...\n",
  363. "release_date 1995-12-15\n",
  364. "revenue 262797249.0\n",
  365. "runtime 104.0\n",
  366. "spoken_languages [{'iso_639_1': 'en', 'name': 'English'}, {'iso...\n",
  367. "status Released\n",
  368. "tagline Roll the dice and unleash the excitement!\n",
  369. "title Jumanji\n",
  370. "video False\n",
  371. "vote_average 6.9\n",
  372. "vote_count 2413.0\n",
  373. "Name: 1, dtype: object"
  374. ]
  375. },
  376. "execution_count": 6,
  377. "metadata": {},
  378. "output_type": "execute_result"
  379. }
  380. ],
  381. "source": [
  382. "#Select the second movie in df\n",
  383. "second = df.iloc[1]\n",
  384. "second"
  385. ]
  386. },
  387. {
  388. "cell_type": "code",
  389. "execution_count": 7,
  390. "metadata": {},
  391. "outputs": [
  392. {
  393. "data": {
  394. "text/plain": [
  395. "adult False\n",
  396. "belongs_to_collection NaN\n",
  397. "budget 65000000\n",
  398. "genres [{'id': 12, 'name': 'Adventure'}, {'id': 14, '...\n",
  399. "homepage NaN\n",
  400. "id 8844\n",
  401. "imdb_id tt0113497\n",
  402. "original_language en\n",
  403. "original_title Jumanji\n",
  404. "overview When siblings Judy and Peter discover an encha...\n",
  405. "popularity 17.015539\n",
  406. "poster_path /vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg\n",
  407. "production_companies [{'name': 'TriStar Pictures', 'id': 559}, {'na...\n",
  408. "production_countries [{'iso_3166_1': 'US', 'name': 'United States o...\n",
  409. "release_date 1995-12-15\n",
  410. "revenue 262797249.0\n",
  411. "runtime 104.0\n",
  412. "spoken_languages [{'iso_639_1': 'en', 'name': 'English'}, {'iso...\n",
  413. "status Released\n",
  414. "tagline Roll the dice and unleash the excitement!\n",
  415. "video False\n",
  416. "vote_average 6.9\n",
  417. "vote_count 2413.0\n",
  418. "Name: Jumanji, dtype: object"
  419. ]
  420. },
  421. "execution_count": 7,
  422. "metadata": {},
  423. "output_type": "execute_result"
  424. }
  425. ],
  426. "source": [
  427. "#Change the index to the title\n",
  428. "df = df.set_index('title')\n",
  429. "\n",
  430. "#Access the movie with title 'Jumanji'\n",
  431. "jum = df.loc['Jumanji']\n",
  432. "jum"
  433. ]
  434. },
  435. {
  436. "cell_type": "code",
  437. "execution_count": 8,
  438. "metadata": {},
  439. "outputs": [],
  440. "source": [
  441. "df = df.reset_index()"
  442. ]
  443. },
  444. {
  445. "cell_type": "code",
  446. "execution_count": 9,
  447. "metadata": {},
  448. "outputs": [
  449. {
  450. "data": {
  451. "text/html": [
  452. "<div>\n",
  453. "<style scoped>\n",
  454. " .dataframe tbody tr th:only-of-type {\n",
  455. " vertical-align: middle;\n",
  456. " }\n",
  457. "\n",
  458. " .dataframe tbody tr th {\n",
  459. " vertical-align: top;\n",
  460. " }\n",
  461. "\n",
  462. " .dataframe thead th {\n",
  463. " text-align: right;\n",
  464. " }\n",
  465. "</style>\n",
  466. "<table border=\"1\" class=\"dataframe\">\n",
  467. " <thead>\n",
  468. " <tr style=\"text-align: right;\">\n",
  469. " <th></th>\n",
  470. " <th>title</th>\n",
  471. " <th>release_date</th>\n",
  472. " <th>budget</th>\n",
  473. " <th>revenue</th>\n",
  474. " <th>runtime</th>\n",
  475. " <th>genres</th>\n",
  476. " </tr>\n",
  477. " </thead>\n",
  478. " <tbody>\n",
  479. " <tr>\n",
  480. " <th>0</th>\n",
  481. " <td>Toy Story</td>\n",
  482. " <td>1995-10-30</td>\n",
  483. " <td>30000000</td>\n",
  484. " <td>373554033.0</td>\n",
  485. " <td>81.0</td>\n",
  486. " <td>[{'id': 16, 'name': 'Animation'}, {'id': 35, '...</td>\n",
  487. " </tr>\n",
  488. " <tr>\n",
  489. " <th>1</th>\n",
  490. " <td>Jumanji</td>\n",
  491. " <td>1995-12-15</td>\n",
  492. " <td>65000000</td>\n",
  493. " <td>262797249.0</td>\n",
  494. " <td>104.0</td>\n",
  495. " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
  496. " </tr>\n",
  497. " <tr>\n",
  498. " <th>2</th>\n",
  499. " <td>Grumpier Old Men</td>\n",
  500. " <td>1995-12-22</td>\n",
  501. " <td>0</td>\n",
  502. " <td>0.0</td>\n",
  503. " <td>101.0</td>\n",
  504. " <td>[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...</td>\n",
  505. " </tr>\n",
  506. " <tr>\n",
  507. " <th>3</th>\n",
  508. " <td>Waiting to Exhale</td>\n",
  509. " <td>1995-12-22</td>\n",
  510. " <td>16000000</td>\n",
  511. " <td>81452156.0</td>\n",
  512. " <td>127.0</td>\n",
  513. " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...</td>\n",
  514. " </tr>\n",
  515. " <tr>\n",
  516. " <th>4</th>\n",
  517. " <td>Father of the Bride Part II</td>\n",
  518. " <td>1995-02-10</td>\n",
  519. " <td>0</td>\n",
  520. " <td>76578911.0</td>\n",
  521. " <td>106.0</td>\n",
  522. " <td>[{'id': 35, 'name': 'Comedy'}]</td>\n",
  523. " </tr>\n",
  524. " </tbody>\n",
  525. "</table>\n",
  526. "</div>"
  527. ],
  528. "text/plain": [
  529. " title release_date budget revenue runtime \\\n",
  530. "0 Toy Story 1995-10-30 30000000 373554033.0 81.0 \n",
  531. "1 Jumanji 1995-12-15 65000000 262797249.0 104.0 \n",
  532. "2 Grumpier Old Men 1995-12-22 0 0.0 101.0 \n",
  533. "3 Waiting to Exhale 1995-12-22 16000000 81452156.0 127.0 \n",
  534. "4 Father of the Bride Part II 1995-02-10 0 76578911.0 106.0 \n",
  535. "\n",
  536. " genres \n",
  537. "0 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n",
  538. "1 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n",
  539. "2 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... \n",
  540. "3 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n",
  541. "4 [{'id': 35, 'name': 'Comedy'}] "
  542. ]
  543. },
  544. "execution_count": 9,
  545. "metadata": {},
  546. "output_type": "execute_result"
  547. }
  548. ],
  549. "source": [
  550. "#Create a smaller dataframe with a subset of all features\n",
  551. "small_df = df[['title', 'release_date', 'budget', 'revenue', 'runtime', 'genres']]\n",
  552. "\n",
  553. "#Output only the first 5 rows of small_df\n",
  554. "small_df.head()"
  555. ]
  556. },
  557. {
  558. "cell_type": "code",
  559. "execution_count": 10,
  560. "metadata": {},
  561. "outputs": [
  562. {
  563. "data": {
  564. "text/html": [
  565. "<div>\n",
  566. "<style scoped>\n",
  567. " .dataframe tbody tr th:only-of-type {\n",
  568. " vertical-align: middle;\n",
  569. " }\n",
  570. "\n",
  571. " .dataframe tbody tr th {\n",
  572. " vertical-align: top;\n",
  573. " }\n",
  574. "\n",
  575. " .dataframe thead th {\n",
  576. " text-align: right;\n",
  577. " }\n",
  578. "</style>\n",
  579. "<table border=\"1\" class=\"dataframe\">\n",
  580. " <thead>\n",
  581. " <tr style=\"text-align: right;\">\n",
  582. " <th></th>\n",
  583. " <th>title</th>\n",
  584. " <th>release_date</th>\n",
  585. " <th>budget</th>\n",
  586. " <th>revenue</th>\n",
  587. " <th>runtime</th>\n",
  588. " <th>genres</th>\n",
  589. " </tr>\n",
  590. " </thead>\n",
  591. " <tbody>\n",
  592. " <tr>\n",
  593. " <th>0</th>\n",
  594. " <td>Toy Story</td>\n",
  595. " <td>1995-10-30</td>\n",
  596. " <td>30000000</td>\n",
  597. " <td>373554033.0</td>\n",
  598. " <td>81.0</td>\n",
  599. " <td>[{'id': 16, 'name': 'Animation'}, {'id': 35, '...</td>\n",
  600. " </tr>\n",
  601. " <tr>\n",
  602. " <th>1</th>\n",
  603. " <td>Jumanji</td>\n",
  604. " <td>1995-12-15</td>\n",
  605. " <td>65000000</td>\n",
  606. " <td>262797249.0</td>\n",
  607. " <td>104.0</td>\n",
  608. " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
  609. " </tr>\n",
  610. " <tr>\n",
  611. " <th>2</th>\n",
  612. " <td>Grumpier Old Men</td>\n",
  613. " <td>1995-12-22</td>\n",
  614. " <td>0</td>\n",
  615. " <td>0.0</td>\n",
  616. " <td>101.0</td>\n",
  617. " <td>[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...</td>\n",
  618. " </tr>\n",
  619. " <tr>\n",
  620. " <th>3</th>\n",
  621. " <td>Waiting to Exhale</td>\n",
  622. " <td>1995-12-22</td>\n",
  623. " <td>16000000</td>\n",
  624. " <td>81452156.0</td>\n",
  625. " <td>127.0</td>\n",
  626. " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...</td>\n",
  627. " </tr>\n",
  628. " <tr>\n",
  629. " <th>4</th>\n",
  630. " <td>Father of the Bride Part II</td>\n",
  631. " <td>1995-02-10</td>\n",
  632. " <td>0</td>\n",
  633. " <td>76578911.0</td>\n",
  634. " <td>106.0</td>\n",
  635. " <td>[{'id': 35, 'name': 'Comedy'}]</td>\n",
  636. " </tr>\n",
  637. " <tr>\n",
  638. " <th>5</th>\n",
  639. " <td>Heat</td>\n",
  640. " <td>1995-12-15</td>\n",
  641. " <td>60000000</td>\n",
  642. " <td>187436818.0</td>\n",
  643. " <td>170.0</td>\n",
  644. " <td>[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...</td>\n",
  645. " </tr>\n",
  646. " <tr>\n",
  647. " <th>6</th>\n",
  648. " <td>Sabrina</td>\n",
  649. " <td>1995-12-15</td>\n",
  650. " <td>58000000</td>\n",
  651. " <td>0.0</td>\n",
  652. " <td>127.0</td>\n",
  653. " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...</td>\n",
  654. " </tr>\n",
  655. " <tr>\n",
  656. " <th>7</th>\n",
  657. " <td>Tom and Huck</td>\n",
  658. " <td>1995-12-22</td>\n",
  659. " <td>0</td>\n",
  660. " <td>0.0</td>\n",
  661. " <td>97.0</td>\n",
  662. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  663. " </tr>\n",
  664. " <tr>\n",
  665. " <th>8</th>\n",
  666. " <td>Sudden Death</td>\n",
  667. " <td>1995-12-22</td>\n",
  668. " <td>35000000</td>\n",
  669. " <td>64350171.0</td>\n",
  670. " <td>106.0</td>\n",
  671. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  672. " </tr>\n",
  673. " <tr>\n",
  674. " <th>9</th>\n",
  675. " <td>GoldenEye</td>\n",
  676. " <td>1995-11-16</td>\n",
  677. " <td>58000000</td>\n",
  678. " <td>352194034.0</td>\n",
  679. " <td>130.0</td>\n",
  680. " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...</td>\n",
  681. " </tr>\n",
  682. " <tr>\n",
  683. " <th>10</th>\n",
  684. " <td>The American President</td>\n",
  685. " <td>1995-11-17</td>\n",
  686. " <td>62000000</td>\n",
  687. " <td>107879496.0</td>\n",
  688. " <td>106.0</td>\n",
  689. " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...</td>\n",
  690. " </tr>\n",
  691. " <tr>\n",
  692. " <th>11</th>\n",
  693. " <td>Dracula: Dead and Loving It</td>\n",
  694. " <td>1995-12-22</td>\n",
  695. " <td>0</td>\n",
  696. " <td>0.0</td>\n",
  697. " <td>88.0</td>\n",
  698. " <td>[{'id': 35, 'name': 'Comedy'}, {'id': 27, 'nam...</td>\n",
  699. " </tr>\n",
  700. " <tr>\n",
  701. " <th>12</th>\n",
  702. " <td>Balto</td>\n",
  703. " <td>1995-12-22</td>\n",
  704. " <td>0</td>\n",
  705. " <td>11348324.0</td>\n",
  706. " <td>78.0</td>\n",
  707. " <td>[{'id': 10751, 'name': 'Family'}, {'id': 16, '...</td>\n",
  708. " </tr>\n",
  709. " <tr>\n",
  710. " <th>13</th>\n",
  711. " <td>Nixon</td>\n",
  712. " <td>1995-12-22</td>\n",
  713. " <td>44000000</td>\n",
  714. " <td>13681765.0</td>\n",
  715. " <td>192.0</td>\n",
  716. " <td>[{'id': 36, 'name': 'History'}, {'id': 18, 'na...</td>\n",
  717. " </tr>\n",
  718. " <tr>\n",
  719. " <th>14</th>\n",
  720. " <td>Cutthroat Island</td>\n",
  721. " <td>1995-12-22</td>\n",
  722. " <td>98000000</td>\n",
  723. " <td>10017322.0</td>\n",
  724. " <td>119.0</td>\n",
  725. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  726. " </tr>\n",
  727. " </tbody>\n",
  728. "</table>\n",
  729. "</div>"
  730. ],
  731. "text/plain": [
  732. " title release_date budget revenue runtime \\\n",
  733. "0 Toy Story 1995-10-30 30000000 373554033.0 81.0 \n",
  734. "1 Jumanji 1995-12-15 65000000 262797249.0 104.0 \n",
  735. "2 Grumpier Old Men 1995-12-22 0 0.0 101.0 \n",
  736. "3 Waiting to Exhale 1995-12-22 16000000 81452156.0 127.0 \n",
  737. "4 Father of the Bride Part II 1995-02-10 0 76578911.0 106.0 \n",
  738. "5 Heat 1995-12-15 60000000 187436818.0 170.0 \n",
  739. "6 Sabrina 1995-12-15 58000000 0.0 127.0 \n",
  740. "7 Tom and Huck 1995-12-22 0 0.0 97.0 \n",
  741. "8 Sudden Death 1995-12-22 35000000 64350171.0 106.0 \n",
  742. "9 GoldenEye 1995-11-16 58000000 352194034.0 130.0 \n",
  743. "10 The American President 1995-11-17 62000000 107879496.0 106.0 \n",
  744. "11 Dracula: Dead and Loving It 1995-12-22 0 0.0 88.0 \n",
  745. "12 Balto 1995-12-22 0 11348324.0 78.0 \n",
  746. "13 Nixon 1995-12-22 44000000 13681765.0 192.0 \n",
  747. "14 Cutthroat Island 1995-12-22 98000000 10017322.0 119.0 \n",
  748. "\n",
  749. " genres \n",
  750. "0 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n",
  751. "1 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n",
  752. "2 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... \n",
  753. "3 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n",
  754. "4 [{'id': 35, 'name': 'Comedy'}] \n",
  755. "5 [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam... \n",
  756. "6 [{'id': 35, 'name': 'Comedy'}, {'id': 10749, '... \n",
  757. "7 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n",
  758. "8 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n",
  759. "9 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... \n",
  760. "10 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n",
  761. "11 [{'id': 35, 'name': 'Comedy'}, {'id': 27, 'nam... \n",
  762. "12 [{'id': 10751, 'name': 'Family'}, {'id': 16, '... \n",
  763. "13 [{'id': 36, 'name': 'History'}, {'id': 18, 'na... \n",
  764. "14 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... "
  765. ]
  766. },
  767. "execution_count": 10,
  768. "metadata": {},
  769. "output_type": "execute_result"
  770. }
  771. ],
  772. "source": [
  773. "#Display the first 15 rows\n",
  774. "small_df.head(15)"
  775. ]
  776. },
  777. {
  778. "cell_type": "code",
  779. "execution_count": 11,
  780. "metadata": {},
  781. "outputs": [
  782. {
  783. "name": "stdout",
  784. "output_type": "stream",
  785. "text": [
  786. "<class 'pandas.core.frame.DataFrame'>\n",
  787. "RangeIndex: 45466 entries, 0 to 45465\n",
  788. "Data columns (total 6 columns):\n",
  789. " # Column Non-Null Count Dtype \n",
  790. "--- ------ -------------- ----- \n",
  791. " 0 title 45460 non-null object \n",
  792. " 1 release_date 45379 non-null object \n",
  793. " 2 budget 45466 non-null object \n",
  794. " 3 revenue 45460 non-null float64\n",
  795. " 4 runtime 45203 non-null float64\n",
  796. " 5 genres 45466 non-null object \n",
  797. "dtypes: float64(2), object(4)\n",
  798. "memory usage: 2.1+ MB\n"
  799. ]
  800. }
  801. ],
  802. "source": [
  803. "#Get information of the data types of each feature\n",
  804. "small_df.info()"
  805. ]
  806. },
  807. {
  808. "cell_type": "code",
  809. "execution_count": 13,
  810. "metadata": {},
  811. "outputs": [
  812. {
  813. "ename": "ValueError",
  814. "evalue": "could not convert string to float: '/ff9qCepilowshEtG2GYWwzt2bs4.jpg'",
  815. "output_type": "error",
  816. "traceback": [
  817. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  818. "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
  819. "Input \u001b[0;32mIn [13]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbudget\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfloat\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
  820. "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/generic.py:5912\u001b[0m, in \u001b[0;36mNDFrame.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 5905\u001b[0m results \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 5906\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miloc[:, i]\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m 5907\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns))\n\u001b[1;32m 5908\u001b[0m ]\n\u001b[1;32m 5910\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 5911\u001b[0m \u001b[38;5;66;03m# else, only a single dtype is given\u001b[39;00m\n\u001b[0;32m-> 5912\u001b[0m new_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5913\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_constructor(new_data)\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mastype\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5915\u001b[0m \u001b[38;5;66;03m# GH 33113: handle empty frame or series\u001b[39;00m\n",
  821. "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/internals/managers.py:419\u001b[0m, in \u001b[0;36mBaseBlockManager.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mastype\u001b[39m(\u001b[38;5;28mself\u001b[39m: T, dtype, copy: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, errors: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[0;32m--> 419\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mastype\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n",
  822. "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/internals/managers.py:304\u001b[0m, in \u001b[0;36mBaseBlockManager.apply\u001b[0;34m(self, f, align_keys, ignore_failures, **kwargs)\u001b[0m\n\u001b[1;32m 302\u001b[0m applied \u001b[38;5;241m=\u001b[39m b\u001b[38;5;241m.\u001b[39mapply(f, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 304\u001b[0m applied \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mNotImplementedError\u001b[39;00m):\n\u001b[1;32m 306\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m ignore_failures:\n",
  823. "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/internals/blocks.py:580\u001b[0m, in \u001b[0;36mBlock.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 563\u001b[0m \u001b[38;5;124;03mCoerce to the new dtype.\u001b[39;00m\n\u001b[1;32m 564\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 576\u001b[0m \u001b[38;5;124;03mBlock\u001b[39;00m\n\u001b[1;32m 577\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 578\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m--> 580\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array_safe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 582\u001b[0m new_values \u001b[38;5;241m=\u001b[39m maybe_coerce_values(new_values)\n\u001b[1;32m 583\u001b[0m newb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmake_block(new_values)\n",
  824. "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1292\u001b[0m, in \u001b[0;36mastype_array_safe\u001b[0;34m(values, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 1289\u001b[0m dtype \u001b[38;5;241m=\u001b[39m dtype\u001b[38;5;241m.\u001b[39mnumpy_dtype\n\u001b[1;32m 1291\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1292\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m# e.g. astype_nansafe can fail on object-dtype of strings\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# trying to convert to float\u001b[39;00m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
  825. "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1237\u001b[0m, in \u001b[0;36mastype_array\u001b[0;34m(values, dtype, copy)\u001b[0m\n\u001b[1;32m 1234\u001b[0m values \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1237\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_nansafe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;66;03m# in pandas we don't store numpy str dtypes, so convert to object\u001b[39;00m\n\u001b[1;32m 1240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(dtype, np\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(values\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtype, \u001b[38;5;28mstr\u001b[39m):\n",
  826. "File \u001b[0;32m/usr/local/Cellar/ipython/8.4.0/libexec/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1181\u001b[0m, in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 1177\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 1179\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copy \u001b[38;5;129;01mor\u001b[39;00m is_object_dtype(arr\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mor\u001b[39;00m is_object_dtype(dtype):\n\u001b[1;32m 1180\u001b[0m \u001b[38;5;66;03m# Explicit copy, or required since NumPy can't view from / to object.\u001b[39;00m\n\u001b[0;32m-> 1181\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43marr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 1183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n",
  827. "\u001b[0;31mValueError\u001b[0m: could not convert string to float: '/ff9qCepilowshEtG2GYWwzt2bs4.jpg'"
  828. ]
  829. }
  830. ],
  831. "source": [
  832. "df['budget'].astype('float')"
  833. ]
  834. },
  835. {
  836. "cell_type": "code",
  837. "execution_count": 14,
  838. "metadata": {},
  839. "outputs": [
  840. {
  841. "name": "stdout",
  842. "output_type": "stream",
  843. "text": [
  844. "<class 'pandas.core.frame.DataFrame'>\n",
  845. "RangeIndex: 45466 entries, 0 to 45465\n",
  846. "Data columns (total 6 columns):\n",
  847. " # Column Non-Null Count Dtype \n",
  848. "--- ------ -------------- ----- \n",
  849. " 0 title 45460 non-null object \n",
  850. " 1 release_date 45379 non-null object \n",
  851. " 2 budget 45463 non-null float64\n",
  852. " 3 revenue 45460 non-null float64\n",
  853. " 4 runtime 45203 non-null float64\n",
  854. " 5 genres 45466 non-null object \n",
  855. "dtypes: float64(3), object(3)\n",
  856. "memory usage: 2.1+ MB\n"
  857. ]
  858. },
  859. {
  860. "name": "stderr",
  861. "output_type": "stream",
  862. "text": [
  863. "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/1765380320.py:13: SettingWithCopyWarning: \n",
  864. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  865. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  866. "\n",
  867. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  868. " small_df['budget'] = small_df['budget'].apply(to_float)\n",
  869. "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/1765380320.py:16: SettingWithCopyWarning: \n",
  870. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  871. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  872. "\n",
  873. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  874. " small_df['budget'] = small_df['budget'].astype('float')\n"
  875. ]
  876. }
  877. ],
  878. "source": [
  879. "#Import the numpy library \n",
  880. "import numpy as np\n",
  881. "\n",
  882. "#Function to convert to float manually\n",
  883. "def to_float(x):\n",
  884. " try:\n",
  885. " x = float(x)\n",
  886. " except: \n",
  887. " x = np.nan\n",
  888. " return x\n",
  889. "\n",
  890. "#Apply the to_float function to all values in the budget column\n",
  891. "small_df['budget'] = small_df['budget'].apply(to_float)\n",
  892. "\n",
  893. "#Try converting to float using pandas astype\n",
  894. "small_df['budget'] = small_df['budget'].astype('float')\n",
  895. "\n",
  896. "#Get the data types for all features\n",
  897. "small_df.info()"
  898. ]
  899. },
  900. {
  901. "cell_type": "code",
  902. "execution_count": 15,
  903. "metadata": {},
  904. "outputs": [
  905. {
  906. "name": "stderr",
  907. "output_type": "stream",
  908. "text": [
  909. "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/2397457688.py:2: SettingWithCopyWarning: \n",
  910. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  911. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  912. "\n",
  913. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  914. " small_df['release_date'] = pd.to_datetime(small_df['release_date'], errors='coerce')\n",
  915. "/var/folders/dv/107570dd01b_m3wvmvv0g9lm0000gn/T/ipykernel_4472/2397457688.py:5: SettingWithCopyWarning: \n",
  916. "A value is trying to be set on a copy of a slice from a DataFrame.\n",
  917. "Try using .loc[row_indexer,col_indexer] = value instead\n",
  918. "\n",
  919. "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
  920. " small_df['year'] = small_df['release_date'].apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)\n"
  921. ]
  922. }
  923. ],
  924. "source": [
  925. "#Convert release_date into pandas datetime format\n",
  926. "small_df['release_date'] = pd.to_datetime(small_df['release_date'], errors='coerce')\n",
  927. "\n",
  928. "#Extract year from the datetime\n",
  929. "small_df['year'] = small_df['release_date'].apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)"
  930. ]
  931. },
  932. {
  933. "cell_type": "code",
  934. "execution_count": 16,
  935. "metadata": {},
  936. "outputs": [
  937. {
  938. "data": {
  939. "text/html": [
  940. "<div>\n",
  941. "<style scoped>\n",
  942. " .dataframe tbody tr th:only-of-type {\n",
  943. " vertical-align: middle;\n",
  944. " }\n",
  945. "\n",
  946. " .dataframe tbody tr th {\n",
  947. " vertical-align: top;\n",
  948. " }\n",
  949. "\n",
  950. " .dataframe thead th {\n",
  951. " text-align: right;\n",
  952. " }\n",
  953. "</style>\n",
  954. "<table border=\"1\" class=\"dataframe\">\n",
  955. " <thead>\n",
  956. " <tr style=\"text-align: right;\">\n",
  957. " <th></th>\n",
  958. " <th>title</th>\n",
  959. " <th>release_date</th>\n",
  960. " <th>budget</th>\n",
  961. " <th>revenue</th>\n",
  962. " <th>runtime</th>\n",
  963. " <th>genres</th>\n",
  964. " <th>year</th>\n",
  965. " </tr>\n",
  966. " </thead>\n",
  967. " <tbody>\n",
  968. " <tr>\n",
  969. " <th>34940</th>\n",
  970. " <td>Passage of Venus</td>\n",
  971. " <td>1874-12-09</td>\n",
  972. " <td>0.0</td>\n",
  973. " <td>0.0</td>\n",
  974. " <td>1.0</td>\n",
  975. " <td>[{'id': 99, 'name': 'Documentary'}]</td>\n",
  976. " <td>1874</td>\n",
  977. " </tr>\n",
  978. " <tr>\n",
  979. " <th>34937</th>\n",
  980. " <td>Sallie Gardner at a Gallop</td>\n",
  981. " <td>1878-06-14</td>\n",
  982. " <td>0.0</td>\n",
  983. " <td>0.0</td>\n",
  984. " <td>1.0</td>\n",
  985. " <td>[{'id': 99, 'name': 'Documentary'}]</td>\n",
  986. " <td>1878</td>\n",
  987. " </tr>\n",
  988. " <tr>\n",
  989. " <th>41602</th>\n",
  990. " <td>Buffalo Running</td>\n",
  991. " <td>1883-11-19</td>\n",
  992. " <td>0.0</td>\n",
  993. " <td>0.0</td>\n",
  994. " <td>1.0</td>\n",
  995. " <td>[{'id': 99, 'name': 'Documentary'}]</td>\n",
  996. " <td>1883</td>\n",
  997. " </tr>\n",
  998. " <tr>\n",
  999. " <th>34933</th>\n",
  1000. " <td>Man Walking Around a Corner</td>\n",
  1001. " <td>1887-08-18</td>\n",
  1002. " <td>0.0</td>\n",
  1003. " <td>0.0</td>\n",
  1004. " <td>1.0</td>\n",
  1005. " <td>[{'id': 99, 'name': 'Documentary'}]</td>\n",
  1006. " <td>1887</td>\n",
  1007. " </tr>\n",
  1008. " <tr>\n",
  1009. " <th>34938</th>\n",
  1010. " <td>Traffic Crossing Leeds Bridge</td>\n",
  1011. " <td>1888-10-15</td>\n",
  1012. " <td>0.0</td>\n",
  1013. " <td>0.0</td>\n",
  1014. " <td>1.0</td>\n",
  1015. " <td>[{'id': 99, 'name': 'Documentary'}]</td>\n",
  1016. " <td>1888</td>\n",
  1017. " </tr>\n",
  1018. " </tbody>\n",
  1019. "</table>\n",
  1020. "</div>"
  1021. ],
  1022. "text/plain": [
  1023. " title release_date budget revenue runtime \\\n",
  1024. "34940 Passage of Venus 1874-12-09 0.0 0.0 1.0 \n",
  1025. "34937 Sallie Gardner at a Gallop 1878-06-14 0.0 0.0 1.0 \n",
  1026. "41602 Buffalo Running 1883-11-19 0.0 0.0 1.0 \n",
  1027. "34933 Man Walking Around a Corner 1887-08-18 0.0 0.0 1.0 \n",
  1028. "34938 Traffic Crossing Leeds Bridge 1888-10-15 0.0 0.0 1.0 \n",
  1029. "\n",
  1030. " genres year \n",
  1031. "34940 [{'id': 99, 'name': 'Documentary'}] 1874 \n",
  1032. "34937 [{'id': 99, 'name': 'Documentary'}] 1878 \n",
  1033. "41602 [{'id': 99, 'name': 'Documentary'}] 1883 \n",
  1034. "34933 [{'id': 99, 'name': 'Documentary'}] 1887 \n",
  1035. "34938 [{'id': 99, 'name': 'Documentary'}] 1888 "
  1036. ]
  1037. },
  1038. "execution_count": 16,
  1039. "metadata": {},
  1040. "output_type": "execute_result"
  1041. }
  1042. ],
  1043. "source": [
  1044. "#Sort DataFrame based on release year\n",
  1045. "small_df = small_df.sort_values('year')\n",
  1046. "\n",
  1047. "small_df.head()"
  1048. ]
  1049. },
  1050. {
  1051. "cell_type": "code",
  1052. "execution_count": 17,
  1053. "metadata": {},
  1054. "outputs": [
  1055. {
  1056. "data": {
  1057. "text/html": [
  1058. "<div>\n",
  1059. "<style scoped>\n",
  1060. " .dataframe tbody tr th:only-of-type {\n",
  1061. " vertical-align: middle;\n",
  1062. " }\n",
  1063. "\n",
  1064. " .dataframe tbody tr th {\n",
  1065. " vertical-align: top;\n",
  1066. " }\n",
  1067. "\n",
  1068. " .dataframe thead th {\n",
  1069. " text-align: right;\n",
  1070. " }\n",
  1071. "</style>\n",
  1072. "<table border=\"1\" class=\"dataframe\">\n",
  1073. " <thead>\n",
  1074. " <tr style=\"text-align: right;\">\n",
  1075. " <th></th>\n",
  1076. " <th>title</th>\n",
  1077. " <th>release_date</th>\n",
  1078. " <th>budget</th>\n",
  1079. " <th>revenue</th>\n",
  1080. " <th>runtime</th>\n",
  1081. " <th>genres</th>\n",
  1082. " <th>year</th>\n",
  1083. " </tr>\n",
  1084. " </thead>\n",
  1085. " <tbody>\n",
  1086. " <tr>\n",
  1087. " <th>14551</th>\n",
  1088. " <td>Avatar</td>\n",
  1089. " <td>2009-12-10</td>\n",
  1090. " <td>237000000.0</td>\n",
  1091. " <td>2.787965e+09</td>\n",
  1092. " <td>162.0</td>\n",
  1093. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  1094. " <td>2009</td>\n",
  1095. " </tr>\n",
  1096. " <tr>\n",
  1097. " <th>26555</th>\n",
  1098. " <td>Star Wars: The Force Awakens</td>\n",
  1099. " <td>2015-12-15</td>\n",
  1100. " <td>245000000.0</td>\n",
  1101. " <td>2.068224e+09</td>\n",
  1102. " <td>136.0</td>\n",
  1103. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  1104. " <td>2015</td>\n",
  1105. " </tr>\n",
  1106. " <tr>\n",
  1107. " <th>1639</th>\n",
  1108. " <td>Titanic</td>\n",
  1109. " <td>1997-11-18</td>\n",
  1110. " <td>200000000.0</td>\n",
  1111. " <td>1.845034e+09</td>\n",
  1112. " <td>194.0</td>\n",
  1113. " <td>[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...</td>\n",
  1114. " <td>1997</td>\n",
  1115. " </tr>\n",
  1116. " <tr>\n",
  1117. " <th>17818</th>\n",
  1118. " <td>The Avengers</td>\n",
  1119. " <td>2012-04-25</td>\n",
  1120. " <td>220000000.0</td>\n",
  1121. " <td>1.519558e+09</td>\n",
  1122. " <td>143.0</td>\n",
  1123. " <td>[{'id': 878, 'name': 'Science Fiction'}, {'id'...</td>\n",
  1124. " <td>2012</td>\n",
  1125. " </tr>\n",
  1126. " <tr>\n",
  1127. " <th>25084</th>\n",
  1128. " <td>Jurassic World</td>\n",
  1129. " <td>2015-06-09</td>\n",
  1130. " <td>150000000.0</td>\n",
  1131. " <td>1.513529e+09</td>\n",
  1132. " <td>124.0</td>\n",
  1133. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  1134. " <td>2015</td>\n",
  1135. " </tr>\n",
  1136. " </tbody>\n",
  1137. "</table>\n",
  1138. "</div>"
  1139. ],
  1140. "text/plain": [
  1141. " title release_date budget revenue \\\n",
  1142. "14551 Avatar 2009-12-10 237000000.0 2.787965e+09 \n",
  1143. "26555 Star Wars: The Force Awakens 2015-12-15 245000000.0 2.068224e+09 \n",
  1144. "1639 Titanic 1997-11-18 200000000.0 1.845034e+09 \n",
  1145. "17818 The Avengers 2012-04-25 220000000.0 1.519558e+09 \n",
  1146. "25084 Jurassic World 2015-06-09 150000000.0 1.513529e+09 \n",
  1147. "\n",
  1148. " runtime genres year \n",
  1149. "14551 162.0 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2009 \n",
  1150. "26555 136.0 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n",
  1151. "1639 194.0 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... 1997 \n",
  1152. "17818 143.0 [{'id': 878, 'name': 'Science Fiction'}, {'id'... 2012 \n",
  1153. "25084 124.0 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 "
  1154. ]
  1155. },
  1156. "execution_count": 17,
  1157. "metadata": {},
  1158. "output_type": "execute_result"
  1159. }
  1160. ],
  1161. "source": [
  1162. "#Sort Movies based on revenue (in descending order)\n",
  1163. "small_df = small_df.sort_values('revenue', ascending=False)\n",
  1164. "\n",
  1165. "small_df.head()"
  1166. ]
  1167. },
  1168. {
  1169. "cell_type": "code",
  1170. "execution_count": 18,
  1171. "metadata": {},
  1172. "outputs": [
  1173. {
  1174. "data": {
  1175. "text/html": [
  1176. "<div>\n",
  1177. "<style scoped>\n",
  1178. " .dataframe tbody tr th:only-of-type {\n",
  1179. " vertical-align: middle;\n",
  1180. " }\n",
  1181. "\n",
  1182. " .dataframe tbody tr th {\n",
  1183. " vertical-align: top;\n",
  1184. " }\n",
  1185. "\n",
  1186. " .dataframe thead th {\n",
  1187. " text-align: right;\n",
  1188. " }\n",
  1189. "</style>\n",
  1190. "<table border=\"1\" class=\"dataframe\">\n",
  1191. " <thead>\n",
  1192. " <tr style=\"text-align: right;\">\n",
  1193. " <th></th>\n",
  1194. " <th>title</th>\n",
  1195. " <th>release_date</th>\n",
  1196. " <th>budget</th>\n",
  1197. " <th>revenue</th>\n",
  1198. " <th>runtime</th>\n",
  1199. " <th>genres</th>\n",
  1200. " <th>year</th>\n",
  1201. " </tr>\n",
  1202. " </thead>\n",
  1203. " <tbody>\n",
  1204. " <tr>\n",
  1205. " <th>14551</th>\n",
  1206. " <td>Avatar</td>\n",
  1207. " <td>2009-12-10</td>\n",
  1208. " <td>237000000.0</td>\n",
  1209. " <td>2.787965e+09</td>\n",
  1210. " <td>162.0</td>\n",
  1211. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  1212. " <td>2009</td>\n",
  1213. " </tr>\n",
  1214. " <tr>\n",
  1215. " <th>26555</th>\n",
  1216. " <td>Star Wars: The Force Awakens</td>\n",
  1217. " <td>2015-12-15</td>\n",
  1218. " <td>245000000.0</td>\n",
  1219. " <td>2.068224e+09</td>\n",
  1220. " <td>136.0</td>\n",
  1221. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  1222. " <td>2015</td>\n",
  1223. " </tr>\n",
  1224. " <tr>\n",
  1225. " <th>1639</th>\n",
  1226. " <td>Titanic</td>\n",
  1227. " <td>1997-11-18</td>\n",
  1228. " <td>200000000.0</td>\n",
  1229. " <td>1.845034e+09</td>\n",
  1230. " <td>194.0</td>\n",
  1231. " <td>[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...</td>\n",
  1232. " <td>1997</td>\n",
  1233. " </tr>\n",
  1234. " <tr>\n",
  1235. " <th>17818</th>\n",
  1236. " <td>The Avengers</td>\n",
  1237. " <td>2012-04-25</td>\n",
  1238. " <td>220000000.0</td>\n",
  1239. " <td>1.519558e+09</td>\n",
  1240. " <td>143.0</td>\n",
  1241. " <td>[{'id': 878, 'name': 'Science Fiction'}, {'id'...</td>\n",
  1242. " <td>2012</td>\n",
  1243. " </tr>\n",
  1244. " <tr>\n",
  1245. " <th>25084</th>\n",
  1246. " <td>Jurassic World</td>\n",
  1247. " <td>2015-06-09</td>\n",
  1248. " <td>150000000.0</td>\n",
  1249. " <td>1.513529e+09</td>\n",
  1250. " <td>124.0</td>\n",
  1251. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  1252. " <td>2015</td>\n",
  1253. " </tr>\n",
  1254. " <tr>\n",
  1255. " <th>28830</th>\n",
  1256. " <td>Furious 7</td>\n",
  1257. " <td>2015-04-01</td>\n",
  1258. " <td>190000000.0</td>\n",
  1259. " <td>1.506249e+09</td>\n",
  1260. " <td>137.0</td>\n",
  1261. " <td>[{'id': 28, 'name': 'Action'}]</td>\n",
  1262. " <td>2015</td>\n",
  1263. " </tr>\n",
  1264. " <tr>\n",
  1265. " <th>26558</th>\n",
  1266. " <td>Avengers: Age of Ultron</td>\n",
  1267. " <td>2015-04-22</td>\n",
  1268. " <td>280000000.0</td>\n",
  1269. " <td>1.405404e+09</td>\n",
  1270. " <td>141.0</td>\n",
  1271. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  1272. " <td>2015</td>\n",
  1273. " </tr>\n",
  1274. " <tr>\n",
  1275. " <th>17437</th>\n",
  1276. " <td>Harry Potter and the Deathly Hallows: Part 2</td>\n",
  1277. " <td>2011-07-07</td>\n",
  1278. " <td>125000000.0</td>\n",
  1279. " <td>1.342000e+09</td>\n",
  1280. " <td>130.0</td>\n",
  1281. " <td>[{'id': 10751, 'name': 'Family'}, {'id': 14, '...</td>\n",
  1282. " <td>2011</td>\n",
  1283. " </tr>\n",
  1284. " <tr>\n",
  1285. " <th>22110</th>\n",
  1286. " <td>Frozen</td>\n",
  1287. " <td>2013-11-27</td>\n",
  1288. " <td>150000000.0</td>\n",
  1289. " <td>1.274219e+09</td>\n",
  1290. " <td>102.0</td>\n",
  1291. " <td>[{'id': 16, 'name': 'Animation'}, {'id': 12, '...</td>\n",
  1292. " <td>2013</td>\n",
  1293. " </tr>\n",
  1294. " <tr>\n",
  1295. " <th>42222</th>\n",
  1296. " <td>Beauty and the Beast</td>\n",
  1297. " <td>2017-03-16</td>\n",
  1298. " <td>160000000.0</td>\n",
  1299. " <td>1.262886e+09</td>\n",
  1300. " <td>129.0</td>\n",
  1301. " <td>[{'id': 10751, 'name': 'Family'}, {'id': 14, '...</td>\n",
  1302. " <td>2017</td>\n",
  1303. " </tr>\n",
  1304. " <tr>\n",
  1305. " <th>43255</th>\n",
  1306. " <td>The Fate of the Furious</td>\n",
  1307. " <td>2017-04-12</td>\n",
  1308. " <td>250000000.0</td>\n",
  1309. " <td>1.238765e+09</td>\n",
  1310. " <td>136.0</td>\n",
  1311. " <td>[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...</td>\n",
  1312. " <td>2017</td>\n",
  1313. " </tr>\n",
  1314. " <tr>\n",
  1315. " <th>20830</th>\n",
  1316. " <td>Iron Man 3</td>\n",
  1317. " <td>2013-04-18</td>\n",
  1318. " <td>200000000.0</td>\n",
  1319. " <td>1.215440e+09</td>\n",
  1320. " <td>130.0</td>\n",
  1321. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  1322. " <td>2013</td>\n",
  1323. " </tr>\n",
  1324. " <tr>\n",
  1325. " <th>30700</th>\n",
  1326. " <td>Minions</td>\n",
  1327. " <td>2015-06-17</td>\n",
  1328. " <td>74000000.0</td>\n",
  1329. " <td>1.156731e+09</td>\n",
  1330. " <td>91.0</td>\n",
  1331. " <td>[{'id': 10751, 'name': 'Family'}, {'id': 16, '...</td>\n",
  1332. " <td>2015</td>\n",
  1333. " </tr>\n",
  1334. " <tr>\n",
  1335. " <th>26567</th>\n",
  1336. " <td>Captain America: Civil War</td>\n",
  1337. " <td>2016-04-27</td>\n",
  1338. " <td>250000000.0</td>\n",
  1339. " <td>1.153304e+09</td>\n",
  1340. " <td>147.0</td>\n",
  1341. " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...</td>\n",
  1342. " <td>2016</td>\n",
  1343. " </tr>\n",
  1344. " <tr>\n",
  1345. " <th>17293</th>\n",
  1346. " <td>Transformers: Dark of the Moon</td>\n",
  1347. " <td>2011-06-28</td>\n",
  1348. " <td>195000000.0</td>\n",
  1349. " <td>1.123747e+09</td>\n",
  1350. " <td>154.0</td>\n",
  1351. " <td>[{'id': 28, 'name': 'Action'}, {'id': 878, 'na...</td>\n",
  1352. " <td>2011</td>\n",
  1353. " </tr>\n",
  1354. " <tr>\n",
  1355. " <th>7000</th>\n",
  1356. " <td>The Lord of the Rings: The Return of the King</td>\n",
  1357. " <td>2003-12-01</td>\n",
  1358. " <td>94000000.0</td>\n",
  1359. " <td>1.118889e+09</td>\n",
  1360. " <td>201.0</td>\n",
  1361. " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
  1362. " <td>2003</td>\n",
  1363. " </tr>\n",
  1364. " <tr>\n",
  1365. " <th>19261</th>\n",
  1366. " <td>Skyfall</td>\n",
  1367. " <td>2012-10-25</td>\n",
  1368. " <td>200000000.0</td>\n",
  1369. " <td>1.108561e+09</td>\n",
  1370. " <td>143.0</td>\n",
  1371. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  1372. " <td>2012</td>\n",
  1373. " </tr>\n",
  1374. " <tr>\n",
  1375. " <th>23617</th>\n",
  1376. " <td>Transformers: Age of Extinction</td>\n",
  1377. " <td>2014-06-25</td>\n",
  1378. " <td>210000000.0</td>\n",
  1379. " <td>1.091405e+09</td>\n",
  1380. " <td>165.0</td>\n",
  1381. " <td>[{'id': 878, 'name': 'Science Fiction'}, {'id'...</td>\n",
  1382. " <td>2014</td>\n",
  1383. " </tr>\n",
  1384. " <tr>\n",
  1385. " <th>18252</th>\n",
  1386. " <td>The Dark Knight Rises</td>\n",
  1387. " <td>2012-07-16</td>\n",
  1388. " <td>250000000.0</td>\n",
  1389. " <td>1.084939e+09</td>\n",
  1390. " <td>165.0</td>\n",
  1391. " <td>[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...</td>\n",
  1392. " <td>2012</td>\n",
  1393. " </tr>\n",
  1394. " <tr>\n",
  1395. " <th>15348</th>\n",
  1396. " <td>Toy Story 3</td>\n",
  1397. " <td>2010-06-16</td>\n",
  1398. " <td>200000000.0</td>\n",
  1399. " <td>1.066970e+09</td>\n",
  1400. " <td>103.0</td>\n",
  1401. " <td>[{'id': 16, 'name': 'Animation'}, {'id': 10751...</td>\n",
  1402. " <td>2010</td>\n",
  1403. " </tr>\n",
  1404. " <tr>\n",
  1405. " <th>11008</th>\n",
  1406. " <td>Pirates of the Caribbean: Dead Man's Chest</td>\n",
  1407. " <td>2006-06-20</td>\n",
  1408. " <td>200000000.0</td>\n",
  1409. " <td>1.065660e+09</td>\n",
  1410. " <td>151.0</td>\n",
  1411. " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
  1412. " <td>2006</td>\n",
  1413. " </tr>\n",
  1414. " <tr>\n",
  1415. " <th>41489</th>\n",
  1416. " <td>Rogue One: A Star Wars Story</td>\n",
  1417. " <td>2016-12-14</td>\n",
  1418. " <td>200000000.0</td>\n",
  1419. " <td>1.056057e+09</td>\n",
  1420. " <td>133.0</td>\n",
  1421. " <td>[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...</td>\n",
  1422. " <td>2016</td>\n",
  1423. " </tr>\n",
  1424. " <tr>\n",
  1425. " <th>17124</th>\n",
  1426. " <td>Pirates of the Caribbean: On Stranger Tides</td>\n",
  1427. " <td>2011-05-14</td>\n",
  1428. " <td>380000000.0</td>\n",
  1429. " <td>1.045714e+09</td>\n",
  1430. " <td>136.0</td>\n",
  1431. " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...</td>\n",
  1432. " <td>2011</td>\n",
  1433. " </tr>\n",
  1434. " <tr>\n",
  1435. " <th>38176</th>\n",
  1436. " <td>Finding Dory</td>\n",
  1437. " <td>2016-06-16</td>\n",
  1438. " <td>200000000.0</td>\n",
  1439. " <td>1.028571e+09</td>\n",
  1440. " <td>97.0</td>\n",
  1441. " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 16, '...</td>\n",
  1442. " <td>2016</td>\n",
  1443. " </tr>\n",
  1444. " <tr>\n",
  1445. " <th>14892</th>\n",
  1446. " <td>Alice in Wonderland</td>\n",
  1447. " <td>2010-03-03</td>\n",
  1448. " <td>200000000.0</td>\n",
  1449. " <td>1.025491e+09</td>\n",
  1450. " <td>108.0</td>\n",
  1451. " <td>[{'id': 10751, 'name': 'Family'}, {'id': 14, '...</td>\n",
  1452. " <td>2010</td>\n",
  1453. " </tr>\n",
  1454. " <tr>\n",
  1455. " <th>36253</th>\n",
  1456. " <td>Zootopia</td>\n",
  1457. " <td>2016-02-11</td>\n",
  1458. " <td>150000000.0</td>\n",
  1459. " <td>1.023784e+09</td>\n",
  1460. " <td>108.0</td>\n",
  1461. " <td>[{'id': 16, 'name': 'Animation'}, {'id': 12, '...</td>\n",
  1462. " <td>2016</td>\n",
  1463. " </tr>\n",
  1464. " <tr>\n",
  1465. " <th>19971</th>\n",
  1466. " <td>The Hobbit: An Unexpected Journey</td>\n",
  1467. " <td>2012-11-26</td>\n",
  1468. " <td>250000000.0</td>\n",
  1469. " <td>1.021104e+09</td>\n",
  1470. " <td>169.0</td>\n",
  1471. " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
  1472. " <td>2012</td>\n",
  1473. " </tr>\n",
  1474. " <tr>\n",
  1475. " <th>44009</th>\n",
  1476. " <td>Despicable Me 3</td>\n",
  1477. " <td>2017-06-15</td>\n",
  1478. " <td>80000000.0</td>\n",
  1479. " <td>1.020063e+09</td>\n",
  1480. " <td>96.0</td>\n",
  1481. " <td>[{'id': 28, 'name': 'Action'}, {'id': 16, 'nam...</td>\n",
  1482. " <td>2017</td>\n",
  1483. " </tr>\n",
  1484. " <tr>\n",
  1485. " <th>12481</th>\n",
  1486. " <td>The Dark Knight</td>\n",
  1487. " <td>2008-07-16</td>\n",
  1488. " <td>185000000.0</td>\n",
  1489. " <td>1.004558e+09</td>\n",
  1490. " <td>152.0</td>\n",
  1491. " <td>[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...</td>\n",
  1492. " <td>2008</td>\n",
  1493. " </tr>\n",
  1494. " </tbody>\n",
  1495. "</table>\n",
  1496. "</div>"
  1497. ],
  1498. "text/plain": [
  1499. " title release_date \\\n",
  1500. "14551 Avatar 2009-12-10 \n",
  1501. "26555 Star Wars: The Force Awakens 2015-12-15 \n",
  1502. "1639 Titanic 1997-11-18 \n",
  1503. "17818 The Avengers 2012-04-25 \n",
  1504. "25084 Jurassic World 2015-06-09 \n",
  1505. "28830 Furious 7 2015-04-01 \n",
  1506. "26558 Avengers: Age of Ultron 2015-04-22 \n",
  1507. "17437 Harry Potter and the Deathly Hallows: Part 2 2011-07-07 \n",
  1508. "22110 Frozen 2013-11-27 \n",
  1509. "42222 Beauty and the Beast 2017-03-16 \n",
  1510. "43255 The Fate of the Furious 2017-04-12 \n",
  1511. "20830 Iron Man 3 2013-04-18 \n",
  1512. "30700 Minions 2015-06-17 \n",
  1513. "26567 Captain America: Civil War 2016-04-27 \n",
  1514. "17293 Transformers: Dark of the Moon 2011-06-28 \n",
  1515. "7000 The Lord of the Rings: The Return of the King 2003-12-01 \n",
  1516. "19261 Skyfall 2012-10-25 \n",
  1517. "23617 Transformers: Age of Extinction 2014-06-25 \n",
  1518. "18252 The Dark Knight Rises 2012-07-16 \n",
  1519. "15348 Toy Story 3 2010-06-16 \n",
  1520. "11008 Pirates of the Caribbean: Dead Man's Chest 2006-06-20 \n",
  1521. "41489 Rogue One: A Star Wars Story 2016-12-14 \n",
  1522. "17124 Pirates of the Caribbean: On Stranger Tides 2011-05-14 \n",
  1523. "38176 Finding Dory 2016-06-16 \n",
  1524. "14892 Alice in Wonderland 2010-03-03 \n",
  1525. "36253 Zootopia 2016-02-11 \n",
  1526. "19971 The Hobbit: An Unexpected Journey 2012-11-26 \n",
  1527. "44009 Despicable Me 3 2017-06-15 \n",
  1528. "12481 The Dark Knight 2008-07-16 \n",
  1529. "\n",
  1530. " budget revenue runtime \\\n",
  1531. "14551 237000000.0 2.787965e+09 162.0 \n",
  1532. "26555 245000000.0 2.068224e+09 136.0 \n",
  1533. "1639 200000000.0 1.845034e+09 194.0 \n",
  1534. "17818 220000000.0 1.519558e+09 143.0 \n",
  1535. "25084 150000000.0 1.513529e+09 124.0 \n",
  1536. "28830 190000000.0 1.506249e+09 137.0 \n",
  1537. "26558 280000000.0 1.405404e+09 141.0 \n",
  1538. "17437 125000000.0 1.342000e+09 130.0 \n",
  1539. "22110 150000000.0 1.274219e+09 102.0 \n",
  1540. "42222 160000000.0 1.262886e+09 129.0 \n",
  1541. "43255 250000000.0 1.238765e+09 136.0 \n",
  1542. "20830 200000000.0 1.215440e+09 130.0 \n",
  1543. "30700 74000000.0 1.156731e+09 91.0 \n",
  1544. "26567 250000000.0 1.153304e+09 147.0 \n",
  1545. "17293 195000000.0 1.123747e+09 154.0 \n",
  1546. "7000 94000000.0 1.118889e+09 201.0 \n",
  1547. "19261 200000000.0 1.108561e+09 143.0 \n",
  1548. "23617 210000000.0 1.091405e+09 165.0 \n",
  1549. "18252 250000000.0 1.084939e+09 165.0 \n",
  1550. "15348 200000000.0 1.066970e+09 103.0 \n",
  1551. "11008 200000000.0 1.065660e+09 151.0 \n",
  1552. "41489 200000000.0 1.056057e+09 133.0 \n",
  1553. "17124 380000000.0 1.045714e+09 136.0 \n",
  1554. "38176 200000000.0 1.028571e+09 97.0 \n",
  1555. "14892 200000000.0 1.025491e+09 108.0 \n",
  1556. "36253 150000000.0 1.023784e+09 108.0 \n",
  1557. "19971 250000000.0 1.021104e+09 169.0 \n",
  1558. "44009 80000000.0 1.020063e+09 96.0 \n",
  1559. "12481 185000000.0 1.004558e+09 152.0 \n",
  1560. "\n",
  1561. " genres year \n",
  1562. "14551 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2009 \n",
  1563. "26555 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n",
  1564. "1639 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... 1997 \n",
  1565. "17818 [{'id': 878, 'name': 'Science Fiction'}, {'id'... 2012 \n",
  1566. "25084 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n",
  1567. "28830 [{'id': 28, 'name': 'Action'}] 2015 \n",
  1568. "26558 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2015 \n",
  1569. "17437 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2011 \n",
  1570. "22110 [{'id': 16, 'name': 'Animation'}, {'id': 12, '... 2013 \n",
  1571. "42222 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2017 \n",
  1572. "43255 [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam... 2017 \n",
  1573. "20830 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2013 \n",
  1574. "30700 [{'id': 10751, 'name': 'Family'}, {'id': 16, '... 2015 \n",
  1575. "26567 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... 2016 \n",
  1576. "17293 [{'id': 28, 'name': 'Action'}, {'id': 878, 'na... 2011 \n",
  1577. "7000 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2003 \n",
  1578. "19261 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2012 \n",
  1579. "23617 [{'id': 878, 'name': 'Science Fiction'}, {'id'... 2014 \n",
  1580. "18252 [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam... 2012 \n",
  1581. "15348 [{'id': 16, 'name': 'Animation'}, {'id': 10751... 2010 \n",
  1582. "11008 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2006 \n",
  1583. "41489 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... 2016 \n",
  1584. "17124 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... 2011 \n",
  1585. "38176 [{'id': 12, 'name': 'Adventure'}, {'id': 16, '... 2016 \n",
  1586. "14892 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2010 \n",
  1587. "36253 [{'id': 16, 'name': 'Animation'}, {'id': 12, '... 2016 \n",
  1588. "19971 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2012 \n",
  1589. "44009 [{'id': 28, 'name': 'Action'}, {'id': 16, 'nam... 2017 \n",
  1590. "12481 [{'id': 18, 'name': 'Drama'}, {'id': 28, 'name... 2008 "
  1591. ]
  1592. },
  1593. "execution_count": 18,
  1594. "metadata": {},
  1595. "output_type": "execute_result"
  1596. }
  1597. ],
  1598. "source": [
  1599. "#Select only those movies which earned more than 1 billion\n",
  1600. "new = small_df[small_df['revenue'] > 1e9]\n",
  1601. "\n",
  1602. "new"
  1603. ]
  1604. },
  1605. {
  1606. "cell_type": "code",
  1607. "execution_count": 19,
  1608. "metadata": {},
  1609. "outputs": [
  1610. {
  1611. "data": {
  1612. "text/html": [
  1613. "<div>\n",
  1614. "<style scoped>\n",
  1615. " .dataframe tbody tr th:only-of-type {\n",
  1616. " vertical-align: middle;\n",
  1617. " }\n",
  1618. "\n",
  1619. " .dataframe tbody tr th {\n",
  1620. " vertical-align: top;\n",
  1621. " }\n",
  1622. "\n",
  1623. " .dataframe thead th {\n",
  1624. " text-align: right;\n",
  1625. " }\n",
  1626. "</style>\n",
  1627. "<table border=\"1\" class=\"dataframe\">\n",
  1628. " <thead>\n",
  1629. " <tr style=\"text-align: right;\">\n",
  1630. " <th></th>\n",
  1631. " <th>title</th>\n",
  1632. " <th>release_date</th>\n",
  1633. " <th>budget</th>\n",
  1634. " <th>revenue</th>\n",
  1635. " <th>runtime</th>\n",
  1636. " <th>genres</th>\n",
  1637. " <th>year</th>\n",
  1638. " </tr>\n",
  1639. " </thead>\n",
  1640. " <tbody>\n",
  1641. " <tr>\n",
  1642. " <th>17437</th>\n",
  1643. " <td>Harry Potter and the Deathly Hallows: Part 2</td>\n",
  1644. " <td>2011-07-07</td>\n",
  1645. " <td>125000000.0</td>\n",
  1646. " <td>1.342000e+09</td>\n",
  1647. " <td>130.0</td>\n",
  1648. " <td>[{'id': 10751, 'name': 'Family'}, {'id': 14, '...</td>\n",
  1649. " <td>2011</td>\n",
  1650. " </tr>\n",
  1651. " <tr>\n",
  1652. " <th>30700</th>\n",
  1653. " <td>Minions</td>\n",
  1654. " <td>2015-06-17</td>\n",
  1655. " <td>74000000.0</td>\n",
  1656. " <td>1.156731e+09</td>\n",
  1657. " <td>91.0</td>\n",
  1658. " <td>[{'id': 10751, 'name': 'Family'}, {'id': 16, '...</td>\n",
  1659. " <td>2015</td>\n",
  1660. " </tr>\n",
  1661. " <tr>\n",
  1662. " <th>7000</th>\n",
  1663. " <td>The Lord of the Rings: The Return of the King</td>\n",
  1664. " <td>2003-12-01</td>\n",
  1665. " <td>94000000.0</td>\n",
  1666. " <td>1.118889e+09</td>\n",
  1667. " <td>201.0</td>\n",
  1668. " <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
  1669. " <td>2003</td>\n",
  1670. " </tr>\n",
  1671. " <tr>\n",
  1672. " <th>44009</th>\n",
  1673. " <td>Despicable Me 3</td>\n",
  1674. " <td>2017-06-15</td>\n",
  1675. " <td>80000000.0</td>\n",
  1676. " <td>1.020063e+09</td>\n",
  1677. " <td>96.0</td>\n",
  1678. " <td>[{'id': 28, 'name': 'Action'}, {'id': 16, 'nam...</td>\n",
  1679. " <td>2017</td>\n",
  1680. " </tr>\n",
  1681. " </tbody>\n",
  1682. "</table>\n",
  1683. "</div>"
  1684. ],
  1685. "text/plain": [
  1686. " title release_date \\\n",
  1687. "17437 Harry Potter and the Deathly Hallows: Part 2 2011-07-07 \n",
  1688. "30700 Minions 2015-06-17 \n",
  1689. "7000 The Lord of the Rings: The Return of the King 2003-12-01 \n",
  1690. "44009 Despicable Me 3 2017-06-15 \n",
  1691. "\n",
  1692. " budget revenue runtime \\\n",
  1693. "17437 125000000.0 1.342000e+09 130.0 \n",
  1694. "30700 74000000.0 1.156731e+09 91.0 \n",
  1695. "7000 94000000.0 1.118889e+09 201.0 \n",
  1696. "44009 80000000.0 1.020063e+09 96.0 \n",
  1697. "\n",
  1698. " genres year \n",
  1699. "17437 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... 2011 \n",
  1700. "30700 [{'id': 10751, 'name': 'Family'}, {'id': 16, '... 2015 \n",
  1701. "7000 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... 2003 \n",
  1702. "44009 [{'id': 28, 'name': 'Action'}, {'id': 16, 'nam... 2017 "
  1703. ]
  1704. },
  1705. "execution_count": 19,
  1706. "metadata": {},
  1707. "output_type": "execute_result"
  1708. }
  1709. ],
  1710. "source": [
  1711. "#Select only those movies which earned more than 1 billion and spent less than 150 million\n",
  1712. "\n",
  1713. "new2 = small_df[(small_df['revenue'] > 1e9) & (small_df['budget'] < 1.5e8)]\n",
  1714. "new2"
  1715. ]
  1716. },
  1717. {
  1718. "cell_type": "code",
  1719. "execution_count": 20,
  1720. "metadata": {},
  1721. "outputs": [
  1722. {
  1723. "data": {
  1724. "text/plain": [
  1725. "pandas.core.series.Series"
  1726. ]
  1727. },
  1728. "execution_count": 20,
  1729. "metadata": {},
  1730. "output_type": "execute_result"
  1731. }
  1732. ],
  1733. "source": [
  1734. "type(small_df['year'])"
  1735. ]
  1736. },
  1737. {
  1738. "cell_type": "code",
  1739. "execution_count": 21,
  1740. "metadata": {},
  1741. "outputs": [
  1742. {
  1743. "name": "stdout",
  1744. "output_type": "stream",
  1745. "text": [
  1746. "1256.0\n",
  1747. "0.0\n"
  1748. ]
  1749. }
  1750. ],
  1751. "source": [
  1752. "\n",
  1753. "#Get the runtime Series object\n",
  1754. "runtime = small_df['runtime']\n",
  1755. "\n",
  1756. "#Print the longest runtime of any movie\n",
  1757. "print(runtime.max())\n",
  1758. "\n",
  1759. "#Print the shortest runtime of any movie\n",
  1760. "print(runtime.min())"
  1761. ]
  1762. },
  1763. {
  1764. "cell_type": "code",
  1765. "execution_count": 22,
  1766. "metadata": {},
  1767. "outputs": [
  1768. {
  1769. "name": "stdout",
  1770. "output_type": "stream",
  1771. "text": [
  1772. "4224578.813474693\n",
  1773. "0.0\n"
  1774. ]
  1775. }
  1776. ],
  1777. "source": [
  1778. "#Get the budget Series object\n",
  1779. "budget = small_df['budget']\n",
  1780. "\n",
  1781. "#Print the mean budget of the movies\n",
  1782. "print(budget.mean())\n",
  1783. "\n",
  1784. "#Print the median budget of the movies\n",
  1785. "print(budget.median())"
  1786. ]
  1787. },
  1788. {
  1789. "cell_type": "code",
  1790. "execution_count": 23,
  1791. "metadata": {},
  1792. "outputs": [
  1793. {
  1794. "data": {
  1795. "text/plain": [
  1796. "8267610.399999982"
  1797. ]
  1798. },
  1799. "execution_count": 23,
  1800. "metadata": {},
  1801. "output_type": "execute_result"
  1802. }
  1803. ],
  1804. "source": [
  1805. "#Get the revenue Series object\n",
  1806. "revenue = small_df['revenue']\n",
  1807. "\n",
  1808. "#Revenue generated by the 90th percentile movie\n",
  1809. "revenue.quantile(0.90)"
  1810. ]
  1811. },
  1812. {
  1813. "cell_type": "code",
  1814. "execution_count": 24,
  1815. "metadata": {},
  1816. "outputs": [
  1817. {
  1818. "data": {
  1819. "text/plain": [
  1820. "2014 1974\n",
  1821. "2015 1905\n",
  1822. "2013 1889\n",
  1823. "2012 1722\n",
  1824. "2011 1667\n",
  1825. " ... \n",
  1826. "1887 1\n",
  1827. "1883 1\n",
  1828. "1893 1\n",
  1829. "2020 1\n",
  1830. "1878 1\n",
  1831. "Name: year, Length: 136, dtype: int64"
  1832. ]
  1833. },
  1834. "execution_count": 24,
  1835. "metadata": {},
  1836. "output_type": "execute_result"
  1837. }
  1838. ],
  1839. "source": [
  1840. "#Get number of movies released each year\n",
  1841. "small_df['year'].value_counts()"
  1842. ]
  1843. },
  1844. {
  1845. "cell_type": "code",
  1846. "execution_count": null,
  1847. "metadata": {},
  1848. "outputs": [],
  1849. "source": []
  1850. }
  1851. ],
  1852. "metadata": {
  1853. "kernelspec": {
  1854. "display_name": "Python 3 (ipykernel)",
  1855. "language": "python",
  1856. "name": "python3"
  1857. },
  1858. "language_info": {
  1859. "codemirror_mode": {
  1860. "name": "ipython",
  1861. "version": 3
  1862. },
  1863. "file_extension": ".py",
  1864. "mimetype": "text/x-python",
  1865. "name": "python",
  1866. "nbconvert_exporter": "python",
  1867. "pygments_lexer": "ipython3",
  1868. "version": "3.10.4"
  1869. }
  1870. },
  1871. "nbformat": 4,
  1872. "nbformat_minor": 2
  1873. }