diff --git "a/Data Analitics/Week 4/.ipynb_checkpoints/TU257-Lab3-2-Data-Transformations-checkpoint.ipynb" "b/Data Analitics/Week 4/.ipynb_checkpoints/TU257-Lab3-2-Data-Transformations-checkpoint.ipynb" new file mode 100644--- /dev/null +++ "b/Data Analitics/Week 4/.ipynb_checkpoints/TU257-Lab3-2-Data-Transformations-checkpoint.ipynb" @@ -0,0 +1,4249 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lab3 - 2 - Data Transformations" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import display, HTML\n", + "display(HTML(\"\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When preparing data for input to machine learning algorithms you may have to perform certain types of data preparation.\n", + "\n", + "In most enterprise solutions all or most of these tasks are automated for you, but in many languages they aren’t. The enterprise solutions are about ‘automating the boring stuff’ so that you don’t have to worry about it and waste valuable time doing boring, repetitive things.\n", + "\n", + "The following examples illustrates a number of ways to record categorical variables into numeric. There are a number of approaches available, and it is up to you to decide which one might work best for your problem, your data, etc.\n", + "\n", + "Let’s begin by loading the data set to be used in these examples. It is a Video Games reviews data set.\n", + "Useful : This website has a list of useful and typical data transformations. Some of these are covered below.\n", + "https://www.kdnuggets.com/2020/07/easy-guide-data-preprocessing-python.html" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamePlatformYear_of_ReleaseGenrePublisherNA_SalesEU_SalesJP_SalesOther_SalesGlobal_SalesCritic_ScoreCritic_CountUser_ScoreUser_CountDeveloperRating
0Wii SportsWii2006.0SportsNintendo41.3628.963.778.4582.5376.051.08322.0NintendoE
1Super Mario Bros.NES1985.0PlatformNintendo29.083.586.810.7740.24NaNNaNNaNNaNNaNNaN
2Mario Kart WiiWii2008.0RacingNintendo15.6812.763.793.2935.5282.073.08.3709.0NintendoE
3Wii Sports ResortWii2009.0SportsNintendo15.6110.933.282.9532.7780.073.08192.0NintendoE
4Pokemon Red/Pokemon BlueGB1996.0Role-PlayingNintendo11.278.8910.221.0031.37NaNNaNNaNNaNNaNNaN
5TetrisGB1989.0PuzzleNintendo23.202.264.220.5830.26NaNNaNNaNNaNNaNNaN
6New Super Mario Bros.DS2006.0PlatformNintendo11.289.146.502.8829.8089.065.08.5431.0NintendoE
7Wii PlayWii2006.0MiscNintendo13.969.182.932.8428.9258.041.06.6129.0NintendoE
8New Super Mario Bros. WiiWii2009.0PlatformNintendo14.446.944.702.2428.3287.080.08.4594.0NintendoE
9Duck HuntNES1984.0ShooterNintendo26.930.630.280.4728.31NaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " Name Platform Year_of_Release Genre \\\n", + "0 Wii Sports Wii 2006.0 Sports \n", + "1 Super Mario Bros. NES 1985.0 Platform \n", + "2 Mario Kart Wii Wii 2008.0 Racing \n", + "3 Wii Sports Resort Wii 2009.0 Sports \n", + "4 Pokemon Red/Pokemon Blue GB 1996.0 Role-Playing \n", + "5 Tetris GB 1989.0 Puzzle \n", + "6 New Super Mario Bros. DS 2006.0 Platform \n", + "7 Wii Play Wii 2006.0 Misc \n", + "8 New Super Mario Bros. Wii Wii 2009.0 Platform \n", + "9 Duck Hunt NES 1984.0 Shooter \n", + "\n", + " Publisher NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales \\\n", + "0 Nintendo 41.36 28.96 3.77 8.45 82.53 \n", + "1 Nintendo 29.08 3.58 6.81 0.77 40.24 \n", + "2 Nintendo 15.68 12.76 3.79 3.29 35.52 \n", + "3 Nintendo 15.61 10.93 3.28 2.95 32.77 \n", + "4 Nintendo 11.27 8.89 10.22 1.00 31.37 \n", + "5 Nintendo 23.20 2.26 4.22 0.58 30.26 \n", + "6 Nintendo 11.28 9.14 6.50 2.88 29.80 \n", + "7 Nintendo 13.96 9.18 2.93 2.84 28.92 \n", + "8 Nintendo 14.44 6.94 4.70 2.24 28.32 \n", + "9 Nintendo 26.93 0.63 0.28 0.47 28.31 \n", + "\n", + " Critic_Score Critic_Count User_Score User_Count Developer Rating \n", + "0 76.0 51.0 8 322.0 Nintendo E \n", + "1 NaN NaN NaN NaN NaN NaN \n", + "2 82.0 73.0 8.3 709.0 Nintendo E \n", + "3 80.0 73.0 8 192.0 Nintendo E \n", + "4 NaN NaN NaN NaN NaN NaN \n", + "5 NaN NaN NaN NaN NaN NaN \n", + "6 89.0 65.0 8.5 431.0 Nintendo E \n", + "7 58.0 41.0 6.6 129.0 Nintendo E \n", + "8 87.0 80.0 8.4 594.0 Nintendo E \n", + "9 NaN NaN NaN NaN NaN NaN " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# perform some Statistics on the items in a panda\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib as plt\n", + "videoReview = pd.read_csv('/Users/brendan.tierney/Dropbox/4-Datasets/Video_Games_Sales_as_at_22_Dec_2016.csv') \n", + "videoReview.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Name object\n", + "Platform object\n", + "Year_of_Release float64\n", + "Genre object\n", + "Publisher object\n", + "NA_Sales float64\n", + "EU_Sales float64\n", + "JP_Sales float64\n", + "Other_Sales float64\n", + "Global_Sales float64\n", + "Critic_Score float64\n", + "Critic_Count float64\n", + "User_Score object\n", + "User_Count float64\n", + "Developer object\n", + "Rating object\n", + "dtype: object" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#What are the data types of each variable\n", + "videoReview.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Nintendo',\n", + " nan,\n", + " 'Good Science Studio',\n", + " 'Rockstar North',\n", + " 'Polyphony Digital',\n", + " 'Infinity Ward, Sledgehammer Games',\n", + " 'Treyarch',\n", + " 'Infinity Ward',\n", + " 'DMA Design',\n", + " 'Game Arts',\n", + " 'Retro Studios, Entertainment Analysis & Development Division',\n", + " 'Bungie Software, Bungie',\n", + " 'Ubisoft',\n", + " 'Bungie',\n", + " 'SquareSoft',\n", + " '343 Industries',\n", + " 'Hudson Soft',\n", + " 'Bethesda Game Studios',\n", + " 'EA Sports',\n", + " 'Bungie Software',\n", + " 'Nd Cube',\n", + " 'Hudson',\n", + " 'Electronic Arts',\n", + " 'The Sims Studio',\n", + " 'Game Freak',\n", + " 'Rockstar Leeds',\n", + " 'Sledgehammer Games',\n", + " 'EA Sports, EA Vancouver',\n", + " 'Nintendo EAD Tokyo',\n", + " 'EA DICE',\n", + " 'EA Black Box',\n", + " 'Ubisoft Paris',\n", + " 'Namco',\n", + " 'Naughty Dog',\n", + " 'HAL Labs',\n", + " 'EA Canada',\n", + " 'EA LA',\n", + " 'Epic Games',\n", + " 'Pipeworks Software, Inc.',\n", + " 'Rockstar San Diego',\n", + " 'Retro Studios',\n", + " 'Blizzard Entertainment',\n", + " 'Reflections Interactive',\n", + " 'Rare Ltd.',\n", + " 'KCEJ',\n", + " 'Kojima Productions',\n", + " 'Square Enix',\n", + " 'SCE/WWS, Media Molecule',\n", + " 'Capcom',\n", + " 'Level 5',\n", + " \"Traveller's Tales\",\n", + " 'Taito Corporation',\n", + " 'Ubisoft Montreal',\n", + " 'Turn 10',\n", + " 'Rocksteady Studios',\n", + " 'Intelligent Systems',\n", + " 'Core Design Ltd.',\n", + " 'EA Tiburon',\n", + " 'Harmonix Music Systems',\n", + " 'Lionhead Studios',\n", + " 'Neversoft Entertainment',\n", + " 'Neversoft Entertainment, BudCat',\n", + " 'Naughty Dog, SCE/WWS',\n", + " 'SCE Santa Monica',\n", + " 'TT Fusion',\n", + " 'Stormfront Studios',\n", + " 'Bluepoint Games',\n", + " 'Vicarious Visions',\n", + " 'Next Level Games',\n", + " 'Konami',\n", + " 'Bethesda Softworks',\n", + " 'Ubisoft Paris, Ubisoft Montpellier',\n", + " 'High Moon Studios',\n", + " 'Insomniac Games',\n", + " 'Mass Media',\n", + " 'Ready at Dawn',\n", + " 'SCEE, Zoe Mode',\n", + " 'Valve Software',\n", + " 'Obsidian Entertainment',\n", + " 'Cat Daddy Games',\n", + " 'Digital Eclipse',\n", + " 'CD Projekt Red Studio',\n", + " 'Ubisoft, Ubisoft Montreal',\n", + " 'Visual Concepts',\n", + " 'Evolution Studios',\n", + " 'KCET',\n", + " 'SCEA, Zindagi Games',\n", + " 'Nintendo, Namco Bandai Games',\n", + " 'Massive Entertainment',\n", + " 'Maxis',\n", + " 'Argonaut Games',\n", + " 'TOSE',\n", + " 'Zipper Interactive',\n", + " 'Pandemic Studios',\n", + " 'Fox Interactive',\n", + " 'Office Create',\n", + " 'Team Soho',\n", + " 'EA Redwood Shores',\n", + " 'Certain Affinity',\n", + " 'Agenda',\n", + " 'Gearbox Software',\n", + " 'Kojima Productions, Moby Dick Studio',\n", + " 'Media Molecule',\n", + " 'Eurocom Entertainment Software',\n", + " 'Ubisoft Quebec',\n", + " 'Double Helix Games',\n", + " 'Rockstar Toronto',\n", + " 'EA Games',\n", + " 'Splash Damage, The Coalition',\n", + " 'Luxoflux, Inc.',\n", + " 'Shiny Entertainment',\n", + " 'Team Bondi',\n", + " '1st Playable Productions',\n", + " 'Dimps Corporation, Sonic Team',\n", + " 'Indies Zero',\n", + " '4J Studios',\n", + " 'BioWare',\n", + " 'Dimps Corporation',\n", + " 'Quantic Dream',\n", + " 'Volition Inc.',\n", + " 'Guerrilla',\n", + " 'Junction Point',\n", + " 'Spike, Namco Bandai Games',\n", + " 'LucasArts, BioWare',\n", + " 'Monolith Productions',\n", + " 'Sonic Team',\n", + " 'Io Interactive',\n", + " 'Respawn Entertainment',\n", + " 'Sucker Punch',\n", + " 'Sumo Digital',\n", + " 'Clap Hanz',\n", + " 'OfficeCreate',\n", + " 'Spark Unlimited',\n", + " 'NetherRealm Studios',\n", + " 'Irrational Games, 2K Marin',\n", + " 'Majesco Games, Majesco',\n", + " 'EA DICE, Danger Close',\n", + " 'Virtual Toys',\n", + " 'NuFX',\n", + " 'Criterion Games',\n", + " 'Toys for Bob',\n", + " 'Warner Bros. Interactive Entertainment, NetherRealm Studios',\n", + " 'Edge of Reality',\n", + " 'LucasArts',\n", + " 'Artoon',\n", + " 'Namco Bandai Games',\n", + " 'Dreamworks Interactive',\n", + " 'Big Blue Box',\n", + " 'DC Studios',\n", + " 'Ubisoft Shanghai',\n", + " 'Slightly Mad Studios',\n", + " 'Team Fusion',\n", + " 'Angel Studios',\n", + " 'Ensemble Studios',\n", + " 'Dice, Danger Close',\n", + " 'syn Sophia',\n", + " \"Yuke's\",\n", + " 'Genius Sonority Inc.',\n", + " 'Acclaim Studios Cheltenham',\n", + " 'Headstrong Games',\n", + " 'CAProduction',\n", + " 'Raven Software',\n", + " 'Midway',\n", + " 'From Software',\n", + " 'Black Rock Studio',\n", + " 'Cyan Worlds',\n", + " 'THQ',\n", + " 'Rainbow Studios',\n", + " 'Black Box',\n", + " 'Bullfrog Productions',\n", + " 'Konami Computer Entertainment Hawaii',\n", + " 'Techland',\n", + " 'Heavy Iron Studios',\n", + " 'Frontier Developments',\n", + " 'GREZZO',\n", + " 'Rare Ltd., BigPark',\n", + " 'ArenaNet',\n", + " 'WB Games Montreal',\n", + " 'HAL Labs, Creatures Inc.',\n", + " 'Backbone Entertainment',\n", + " 'Sega',\n", + " 'Exakt',\n", + " 'TT Games',\n", + " '2K Marin',\n", + " '5TH Cell',\n", + " 'Beenox',\n", + " 'Amaze Entertainment',\n", + " 'Bizarre Creations',\n", + " 'Omega Force',\n", + " 'Visceral Games',\n", + " 'Blue Planet Software',\n", + " 'Matrix Software',\n", + " 'Full Fat',\n", + " 'Helixe',\n", + " 'EA Vancouver',\n", + " 'Ubisoft Montpellier',\n", + " 'ChunSoft',\n", + " 'Midway Studios - San Diego',\n", + " 'Acquire',\n", + " 'h.a.n.d. Inc.',\n", + " 'Ghost Games',\n", + " 'Check Six Games',\n", + " 'PLAYGROUND, Playground Games',\n", + " 'Aki Corp.',\n", + " 'Flagship, HAL Labs',\n", + " 'Jellyvision',\n", + " 'Art',\n", + " 'Irrational Games',\n", + " 'Nintendo, Nd Cube',\n", + " 'NanaOn-Sha',\n", + " 'Factor 5',\n", + " 'Papaya Studios',\n", + " 'SCEI',\n", + " 'Krome Studios',\n", + " 'Avalanche Studios',\n", + " 'Jupiter Corporation',\n", + " 'Rockstar Studios',\n", + " 'Altron',\n", + " 'Creative Assembly',\n", + " 'Sega AM2',\n", + " 'Hangar 13',\n", + " 'BudCat',\n", + " 'Tango Gameworks',\n", + " 'SCEE London Studio',\n", + " 'Genuine Games',\n", + " 'Team Ninja',\n", + " 'Camelot Software Planning',\n", + " 'Red Storm Entertainment',\n", + " 'VIS Entertainment',\n", + " 'Realtime Worlds',\n", + " 'Monolith Soft',\n", + " 'Terminal Reality',\n", + " 'Ready at Dawn, SCE Santa Monica',\n", + " 'SCE Japan Studio',\n", + " 'ArtePiazza',\n", + " 'Ubisoft Reflections, Ivory Tower',\n", + " 'Tri-Ace',\n", + " 'Arkane Studios',\n", + " '3d6 Games',\n", + " 'Magic Pockets',\n", + " 'Artefacts Studio',\n", + " 'Hello Games',\n", + " 'Activision',\n", + " 'Griptonite Games',\n", + " 'Webfoot Technologies',\n", + " 'Farsight Studios',\n", + " 'Firaxis Games',\n", + " 'Bandai Namco Games',\n", + " 'Rebellion',\n", + " 'Shaba Games',\n", + " 'Artificial Mind and Movement',\n", + " 'Paradigm Entertainment',\n", + " 'EA Chicago',\n", + " 'SuperMassive Games',\n", + " 'HB Studios Multimedia',\n", + " 'EA Sports Big',\n", + " 'Spike',\n", + " 'Ninja Theory',\n", + " 'EA Montreal',\n", + " 'id Software',\n", + " 'Avalanche Software',\n", + " 'Capcom Vancouver',\n", + " 'Pivotal Games',\n", + " 'MachineGames',\n", + " '2K Czech',\n", + " 'Mind Candy',\n", + " 'Naughty Dog, Sony Bend',\n", + " 'Cavia Inc.',\n", + " 'The Code Monkeys',\n", + " 'Amusement Vision',\n", + " 'Genius Sonority Inc., Disney Interactive Studios',\n", + " 'Sports Interactive',\n", + " 'Genki',\n", + " 'Blue Castle Games',\n", + " 'Barking Lizards',\n", + " 'Zoe Mode',\n", + " 'Kush Games',\n", + " 'Nihilistic',\n", + " 'WayForward',\n", + " 'Kaos Studios',\n", + " 'Hellbent Games',\n", + " 'Blitz Games',\n", + " 'BigPark',\n", + " 'Planet Moon Studios',\n", + " 'Blue Byte, Related Designs',\n", + " 'EA Bright Light',\n", + " 'Digital Extremes, 2K Marin',\n", + " 'Sand Grain Studios',\n", + " 'Flagship',\n", + " 'PlatinumGames',\n", + " 'Danger Close',\n", + " 'Barnstorm Games',\n", + " 'Incognito Inc.',\n", + " 'Exient Entertainment',\n", + " 'Codemasters',\n", + " 'Playground Games',\n", + " 'Arzest',\n", + " 'Crytek',\n", + " 'Crystal Dynamics',\n", + " 'Sega, Sonic Team',\n", + " 'Z-Axis, Ltd.',\n", + " 'Now Production',\n", + " 'Eidos Montreal',\n", + " 'Square Enix, Think and Feel',\n", + " 'The Coalition',\n", + " 'Pitbull Syndicate',\n", + " 'Codemasters Birmingham',\n", + " 'Rockstar Vienna',\n", + " 'Remedy Entertainment',\n", + " 'Steel Monkeys',\n", + " 'Acclaim',\n", + " 'Radical Entertainment',\n", + " 'Good-Feel',\n", + " 'SCEA',\n", + " 'Namco Bandai Games America, Namco Bandai Games',\n", + " 'Creatures Inc.',\n", + " 'Pocket Studios',\n", + " 'M2',\n", + " 'Melbourne House',\n", + " 'Ubisoft Sofia',\n", + " 'ZeniMax Media',\n", + " 'Snowblind Studios',\n", + " 'Lexis Numerique, Virtual Toys',\n", + " 'Opus, Project Sora',\n", + " 'Nintendo, Spike Chunsoft',\n", + " 'CyberConnect2',\n", + " '3G Studios',\n", + " 'Tamsoft',\n", + " \"THQ, Yuke's\",\n", + " 'Virtuos',\n", + " 'Nixxes Software',\n", + " 'Slant Six',\n", + " 'Tarsier Studios, Double Eleven',\n", + " 'Spike Chunsoft Co. Ltd., Spike Chunsoft',\n", + " 'Chris Sawyer',\n", + " '49Games',\n", + " 'Oddworld Inhabitants',\n", + " 'High Voltage Software',\n", + " 'neo Software',\n", + " 'Savage Entertainment',\n", + " 'Nex Entertainment',\n", + " 'Surreal Software',\n", + " 'Climax Group',\n", + " 'Locomotive Games',\n", + " 'Nintendo, Nintendo Software Technology',\n", + " 'Omega Force, Tecmo Koei Games',\n", + " 'Light Weight',\n", + " 'City Interactive',\n", + " 'Vingt-et-un Systems',\n", + " 'K2 LLC',\n", + " 'Ambrella',\n", + " 'Day 1 Studios',\n", + " 'Level 5, Brownie Brown',\n", + " 'United Front Games',\n", + " 'Ryu ga Gotoku Studios',\n", + " 'Atlus',\n", + " 'Ruffian Games',\n", + " 'Mercury Steam',\n", + " 'MTO',\n", + " 'EA Redwood Shores, EA Salt Lake',\n", + " 'Sony Online Entertainment',\n", + " 'Big Sky Software',\n", + " 'Visual Impact',\n", + " 'Ubisoft Shanghai/Paris',\n", + " 'PopCap',\n", + " 'Electronic Arts, Rebellion',\n", + " 'SCEE',\n", + " 'Page 44 Studios',\n", + " 'FreeStyleGames',\n", + " 'Mad Doc Software, Rockstar Vancouver',\n", + " 'Blizzard North',\n", + " 'Arika',\n", + " 'Barnhouse Effect',\n", + " 'Vigil Games',\n", + " 'n-Space',\n", + " 'Black Lantern Studios',\n", + " 'Microsoft Game Studios',\n", + " 'Sega AM3',\n", + " 'Tecmo',\n", + " '8ing/Raizing',\n", + " 'Electronic Arts, EA Redwood Shores',\n", + " 'Splash Damage',\n", + " 'Studio Liverpool',\n", + " 'Red Storm Entertainment, Ubisoft Paris',\n", + " 'Tantalus Interactive, Tantatus, Nintendo',\n", + " 'Crystal Dynamics, Nixxes Software',\n", + " 'Avanquest Software',\n", + " 'Bigbig Studios',\n", + " 'EA Salt Lake',\n", + " 'Longtail Studios',\n", + " 'Powerhead Games',\n", + " 'Epic Games, People Can Fly',\n", + " 'Sega, Dimps Corporation',\n", + " 'Koei',\n", + " 'Hyde',\n", + " 'Project Aces',\n", + " 'Team Ninja, Tecmo',\n", + " 'Razorworks Studios',\n", + " '989 Sports',\n", + " 'UBlart Montpellier',\n", + " 'Free Radical Design',\n", + " 'DreamFactory',\n", + " 'Incinerator Games',\n", + " 'Route 1 Games',\n", + " 'Rockstar London',\n", + " 'Ubisoft Toronto',\n", + " 'Double Fine Productions',\n", + " 'Turtle Rock Studios',\n", + " 'Giants Software',\n", + " 'Vicarious Visions, Neversoft Entertainment',\n", + " 'Blue Tongue Entertainment',\n", + " 'Overworks',\n", + " 'Monster Games Inc.',\n", + " 'Atari',\n", + " 'Coresoft',\n", + " 'SuperBot Entertainment',\n", + " 'Ubisoft Reflections',\n", + " 'Monkey Bar Games',\n", + " 'Alfa System',\n", + " 'Buzz Monkey',\n", + " 'Rockstar Vancouver',\n", + " 'SCEA San Diego Studios',\n", + " 'Neko Entertainment',\n", + " 'Square Enix, h.a.n.d. Inc.',\n", + " 'UDS',\n", + " 'KCE Studios',\n", + " 'Titus Software',\n", + " 'Black Ops Entertainment',\n", + " 'ImaginEngine',\n", + " 'Tantalus Interactive',\n", + " 'Mistwalker, Artoon',\n", + " 'Marvelous AQL',\n", + " 'Xpec',\n", + " 'Mistwalker',\n", + " 'Blitz Games Studios',\n", + " 'KCEK',\n", + " 'Sony Bend',\n", + " 'RED Entertainment',\n", + " 'FASA Studio',\n", + " 'Runecraft',\n", + " 'Big Huge Games',\n", + " 'Media Molecule, United Front Games',\n", + " 'SCi',\n", + " 'Cooking Mama Limited',\n", + " 'Disney Interactive Studios',\n", + " 'Red Fly Studio',\n", + " 'Cygames',\n", + " 'Rage Software',\n", + " 'Toolbox Design',\n", + " 'Team 17',\n", + " 'Dimps Corporation, Namco Bandai Games',\n", + " 'Land Ho!',\n", + " 'Asobo Studio',\n", + " 'Eutechnyx',\n", + " 'Gearbox Software, 3D Realms',\n", + " 'Digital Anvil',\n", + " 'Treyarch, Sledgehammer Games',\n", + " 'Marvelous Entertainment',\n", + " 'EA Seattle',\n", + " 'Big Blue Bubble Inc.',\n", + " 'Indie Built',\n", + " 'Ludia Inc.',\n", + " 'Evolution Games',\n", + " 'Atlus Co.',\n", + " 'Capcom, Pipeworks Software, Inc.',\n", + " 'Gevo Entertainment',\n", + " 'Gorilla Systems',\n", + " 'Guerilla Cambridge',\n", + " 'Marvelous',\n", + " 'Gorilla Games',\n", + " 'Deibus Studios',\n", + " 'Square Enix, Tri-Ace',\n", + " 'Novarama',\n", + " 'Sumo Digital, Playground Games',\n", + " 'Trilobyte',\n", + " 'Silicon Knights',\n", + " 'Digital Illusions',\n", + " 'Kush Games, Visual Concepts',\n", + " 'iNiS',\n", + " 'EA Canada, Nihilistic',\n", + " 'Eden Studios',\n", + " 'David A. Palmer Productions',\n", + " 'Artificial Mind and Movement, Polygon Magic',\n", + " 'Acclaim Studios Austin',\n", + " 'Tetris Online, Inc',\n", + " 'Dancing Dots',\n", + " 'Piranha Games',\n", + " 'LucasArts, Red Fly Studio',\n", + " 'Game Republic',\n", + " 'Project Soul',\n", + " 'Kouyousha',\n", + " 'Propaganda Games',\n", + " 'JGI Entertainment',\n", + " 'Digital Extremes',\n", + " 'Brownie Brown',\n", + " 'Natsume',\n", + " 'Arc System Works',\n", + " 'Takara',\n", + " 'CyberConnect2, Racjin',\n", + " 'Dearsoft',\n", + " 'Paradox Development',\n", + " '2K Australia',\n", + " 'Realism Ltd',\n", + " 'Bandai',\n", + " 'Human Soft',\n", + " 'Jester Interactive',\n", + " 'Equinoxe',\n", + " 'Pacific Coast Power & Light',\n", + " 'Koei, Omega Force',\n", + " 'id Software, Nerve Software',\n", + " 'Eighting, Genius Sonority Inc.',\n", + " 'Reality Pump',\n", + " 'Virtucraft',\n", + " 'Illusion Softworks',\n", + " 'SCEA, Sims',\n", + " 'Kalypso',\n", + " 'Midway Studios - Los Angeles',\n", + " 'Ubisoft Milan',\n", + " 'Fuse Games Limited',\n", + " 'Pyramid',\n", + " 'Hypnos Entertainment',\n", + " 'Black Isle Studios',\n", + " 'Gaijin Entertainment',\n", + " 'Eden',\n", + " 'Blue Fang Games',\n", + " 'Sanzaru Games',\n", + " 'Tomy Corporation',\n", + " 'Secret Level',\n", + " 'Nerve Software',\n", + " 'Trion Worlds',\n", + " 'BioWare, Demiurge Studios',\n", + " 'Engine Software',\n", + " 'RED Entertainment, Artdink',\n", + " 'Cunning Developments',\n", + " 'Starbreeze',\n", + " 'Delphine Software International',\n", + " 'Gameloft',\n", + " 'Grasshopper Manufacture',\n", + " 'Shift, Bandai Namco Games',\n", + " 'Dimps Corporation, SCE Japan Studio',\n", + " 'Krome Studios, Screenlife Games',\n", + " 'Suzak',\n", + " 'Iron Galaxy Studios',\n", + " 'GenePool',\n", + " 'Ubisoft Casablanca',\n", + " 'Epicenter Studios',\n", + " 'KCEA',\n", + " 'Adrenium',\n", + " 'Juice Games',\n", + " 'id Software, Raven Software',\n", + " 'SEGA Racing Studio',\n", + " 'Clover Studio',\n", + " 'SuperVillain Studios',\n", + " 'EA Sports, EA Canada',\n", + " 'MagicWand',\n", + " 'Media Vision, Contrail',\n", + " 'Paon Corporation',\n", + " 'Lab Rats Games',\n", + " 'Vicious Cycle',\n", + " 'Cinegroupe',\n", + " 'High Impact Games',\n", + " 'Skyworks Technologies',\n", + " 'Robomodo',\n", + " 'Ascaron Entertainment GmbH, Ascaron Entertainment',\n", + " '2K Sports',\n", + " 'FUN Labs',\n", + " 'Appaloosa Interactive',\n", + " 'Vanillaware',\n", + " 'Endemol',\n", + " 'Dreamworks Games',\n", + " 'Atomic Planet Entertainment',\n", + " 'Coldwood Interactive',\n", + " 'Valuewave Co.,Ltd.',\n", + " 'Pam Development',\n", + " 'Ubisoft Romania',\n", + " 'Spicy Horse',\n", + " 'Overkill Software',\n", + " 'Midway Studios - Austin',\n", + " 'Bunkasha Publishing',\n", + " 'Nippon Ichi Software',\n", + " 'Neverland',\n", + " 'Silicon Studio',\n", + " 'Namco Bandai Games, Bandai Namco Games',\n", + " 'Vanilla Ware',\n", + " 'Full-Fat',\n", + " 'Etranges Libellules',\n", + " 'Relic',\n", + " 'Collision Studios',\n", + " 'Anchor',\n", + " 'Racjin',\n", + " 'Magenta Software',\n", + " 'Awesome Studios',\n", + " 'Behaviour Interactive',\n", + " 'Pipe Dream Interactive',\n", + " 'Bullets',\n", + " 'Aspect',\n", + " 'Deep Space',\n", + " 'Inti',\n", + " 'Bam Entertainment',\n", + " 'THQ Digital Studio Phoenix',\n", + " 'Brain Toys',\n", + " 'Hothouse Creations',\n", + " 'Nuevo Retro Games',\n", + " 'Digital Kids',\n", + " 'Yager',\n", + " 'Omega Force, Koei Tecmo Games',\n", + " '4A Games',\n", + " 'Cing',\n", + " 'WXP',\n", + " 'Cerasus Media',\n", + " 'Inevitable Entertainment',\n", + " 'Destineer',\n", + " 'Eighting',\n", + " 'SCEA Sports Studio',\n", + " 'Keen Games',\n", + " 'Storm City Games',\n", + " 'ValuSoft',\n", + " 'High Voltage Software, Ubisoft Paris',\n", + " 'GRIN',\n", + " 'Q-Games',\n", + " 'Raster',\n", + " 'Vivarium',\n", + " 'VU Games',\n", + " 'Machatin, Inc.',\n", + " 'Point of View',\n", + " 'City Interactive, Deck 13',\n", + " 'T&E Soft',\n", + " 'Climax Group, Climax Studios',\n", + " 'HexaDrive',\n", + " 'Jupiter Corporation, Jupiter Multimedia',\n", + " 'Blade Interactive',\n", + " 'Factor 5, SCE/WWS',\n", + " 'SCE Japan Studio, comcept',\n", + " 'WBIE',\n", + " 'Warthog',\n", + " 'AWE Games',\n", + " 'Contraband Entertainment',\n", + " 'Eugen Systems',\n", + " 'Quest',\n", + " 'Kuusou Kagaku',\n", + " 'HandMade Game',\n", + " '7 Studios',\n", + " 'Blue Tongue',\n", + " 'SCS Software',\n", + " 'Humongous Entertainment',\n", + " 'Pocketeers',\n", + " 'Harmonix Music Systems, Backbone Entertainment',\n", + " 'Scholastic, Inc.',\n", + " 'Screenlife Games, WXP',\n", + " 'Totally Games',\n", + " 'Ratbag',\n", + " 'SCEJ',\n", + " 'Razorworks',\n", + " 'New',\n", + " 'Beyond Games',\n", + " 'Teyon',\n", + " 'Ubisoft Barcelona, Ubisoft Reflections',\n", + " 'Eurocom Entertainment Software, Visceral Games',\n", + " 'Nintendo, Headstrong Games',\n", + " \"Shin'en\",\n", + " 'Creations',\n", + " 'Crave',\n", + " 'Vatra',\n", + " 'Treasure',\n", + " 'Human Soft, Ivolgamus',\n", + " 'Eighting/Raizing',\n", + " '505 Games',\n", + " 'Studio Gigante',\n", + " 'Wideload Games Inc.',\n", + " 'Farsight Studios, Crave',\n", + " 'Computer Artworks',\n", + " 'Saffire',\n", + " 'Nintendo, Camelot Software Planning',\n", + " 'Team 17, Two Tribes',\n", + " 'Square Enix, TOSE',\n", + " 'Kuju Entertainment',\n", + " 'Humagade',\n", + " 'Q Entertainment',\n", + " 'Tri-Crescendo',\n", + " 'Tenyo',\n", + " 'LucasArts, Krome Studios',\n", + " 'Idea Factory, Compile Heart',\n", + " 'Kamui',\n", + " 'Ubisoft, Sensory Sweep',\n", + " 'War Drum Studios',\n", + " 'TOYBOX',\n", + " 'Sunrise Interactive',\n", + " 'Hijinx Studios',\n", + " 'Game Machine Studios',\n", + " 'Mastiff',\n", + " 'Gust',\n", + " 'Ritual Entertainment, Avalanche Software',\n", + " 'D3Publisher',\n", + " 'Psygnosis',\n", + " 'Armature Studio',\n", + " 'RedLynx',\n", + " 'Creat Studios',\n", + " 'Phoenix Games Studio',\n", + " 'Nintendo, Intelligent Systems',\n", + " 'Telegames',\n", + " 'Bluehole Studio',\n", + " 'Spike Chunsoft',\n", + " 'Red Storm Entertainment, Ubisoft Montreal',\n", + " 'Saber Interactive',\n", + " 'Idol Minds',\n", + " 'Maximum Games',\n", + " 'Knowledge Adventure Inc.',\n", + " 'Harmonix Music Systems, Demiurge Studios',\n", + " 'Shift',\n", + " 'Torus Games',\n", + " 'Triumph Studios',\n", + " 'DONTNOD Entertainment',\n", + " 'Twisted Pixel Games',\n", + " 'Zoo Games',\n", + " 'Zen Studios',\n", + " 'Victor Interactive Software',\n", + " 'Frogwares, Mistic Software',\n", + " 'Firebrand Games',\n", + " 'Prope',\n", + " 'Wow Entertainment',\n", + " 'Monolith Soft, Banpresto',\n", + " 'Sega Studios San Francisco',\n", + " 'Ganbarion',\n", + " 'Milestone S.r.l',\n", + " 'Red Tribe',\n", + " 'Red Zone Entertainment',\n", + " 'Amuze',\n", + " 'Uacari',\n", + " 'Bugbear',\n", + " 'Cyberlore Studios',\n", + " 'Lightning Fish Games',\n", + " 'Crafts & Meister',\n", + " 'Open Sesame',\n", + " '3DO',\n", + " 'Blueside',\n", + " 'Arcade Moon',\n", + " 'Noise Inc.',\n", + " '2K Games',\n", + " 'Psyonix',\n", + " 'Playbox Limited',\n", + " 'Takara Tomy',\n", + " 'KCEO',\n", + " 'Zombie Studios',\n", + " 'Bandai Namco Games, Artdink',\n", + " 'Pax Softonica',\n", + " 'Zener Works',\n", + " 'RED Entertainment, Spike Chunsoft',\n", + " 'Cattle Call',\n", + " 'Inland Productions',\n", + " 'Take-Two Interactive',\n", + " 'CokeM Interactive',\n", + " 'ITL',\n", + " 'Sandlot',\n", + " 'Hypnotix',\n", + " 'Vir2L Studios',\n", + " 'Airtight Games',\n", + " 'Deep Silver Dambuster Studios',\n", + " 'WildTangent',\n", + " 'Destination Software',\n", + " 'Brash Entertainment',\n", + " 'Haemimont',\n", + " 'High Voltage Software, Backbone Entertainment',\n", + " 'Nadeo',\n", + " 'Impulse Games',\n", + " 'Sennari Interactive',\n", + " 'Gas Powered Games',\n", + " 'Tantatus',\n", + " 'Starfish',\n", + " 'Buena Vista Interactive',\n", + " 'Cohort Studios',\n", + " 'Ubisoft, Powerhead Games',\n", + " 'EA Canada, EA Vancouver',\n", + " 'Inti Creates',\n", + " 'Fizz Factor',\n", + " 'Ion Storm',\n", + " 'SNK Playmore',\n", + " 'Smart Bomb Interactive',\n", + " 'Amaze Entertainment, Griptonite Games',\n", + " 'SunSoft',\n", + " 'Lancarse, Atlus',\n", + " 'Capcom, Klein Computer Entertainment',\n", + " 'Terminal Reality, 4mm Games',\n", + " 'AI',\n", + " 'Eidos Interactive',\n", + " 'Smack Down Productions',\n", + " 'Nobilis',\n", + " 'Polygon Magic',\n", + " 'Buzz Monkey, EA Montreal',\n", + " 'Atlus, Dingo Inc.',\n", + " 'Opus',\n", + " 'Venom Games',\n", + " 'Game Republic, JapanStudio',\n", + " 'CodeFire',\n", + " 'Phantagram',\n", + " 'Mercury Steam, Alchemic Productions',\n", + " 'SScholastic Media',\n", + " 'Access Games',\n", + " 'Image Epoch, Imageepoch',\n", + " 'Human Head Studios',\n", + " 'Big Red Button Entertainment',\n", + " 'Blue Byte',\n", + " 'Wildfire Studios',\n", + " 'MumboJumbo',\n", + " 'Sidhe Interactive',\n", + " 'Inti, Inti Creates',\n", + " 'Magix',\n", + " 'Mucky Foot Productions',\n", + " 'Denyu-Sha',\n", + " 'Ronin Entertainment',\n", + " 'Disney Interactive',\n", + " 'Artdink',\n", + " 'Shade',\n", + " 'Prolific',\n", + " 'SCE Studio Cambridge',\n", + " 'Sinister Games',\n", + " 'Banpresto, SoftMax',\n", + " 'Mahou',\n", + " 'Graphic State',\n", + " 'Wizarbox',\n", + " 'Hyperspace Cowgirls',\n", + " 'Related Designs',\n", + " 'The Workshop',\n", + " 'Neversoft Entertainment, Underground Development',\n", + " 'Simon & Schuster',\n", + " 'Banpresto',\n", + " 'DoubleTap Games',\n", + " 'Mistic Software',\n", + " 'IR Gurus',\n", + " 'LightBox Interactive',\n", + " 'Akella',\n", + " 'Game Brains',\n", + " 'Circus Freak',\n", + " 'Infinite Interactive',\n", + " 'Wargaming.net',\n", + " 'Game Titan',\n", + " 'Sacnoth',\n", + " 'Cave',\n", + " 'Sanuk Games',\n", + " 'Just Add Monsters',\n", + " 'Sims',\n", + " 'Volatile Games',\n", + " 'Media Vision',\n", + " 'Ignition Entertainment',\n", + " 'Zindagi Games',\n", + " 'Zoom',\n", + " 'Sensory Sweep',\n", + " 'Telltale Games',\n", + " 'Sega Toys',\n", + " 'Presto Studios',\n", + " 'Ornith',\n", + " 'Zoe Mode, HB Studios Multimedia',\n", + " 'Activision, FreeStyleGames',\n", + " 'Attention To Detail',\n", + " 'Crawfish Interactive',\n", + " 'Rockstar Toronto, Rockstar Leeds',\n", + " 'HotGen',\n", + " 'Amble',\n", + " 'Gusto Games',\n", + " 'GameMill Publishing, 1st Playable Productions',\n", + " 'The Learning Company',\n", + " 'Mac Play, MacPlay',\n", + " 'Ubisoft Vancouver',\n", + " 'Majesco Games',\n", + " 'TimeGate Studios',\n", + " 'Pacific Century Cyber Works',\n", + " 'LDW, Big Fish Games',\n", + " 'Rocksteady Studios, Virtuos',\n", + " 'Smilebit',\n", + " 'Blue Shift',\n", + " 'Rovio Mobile, Rovio Entertainment',\n", + " 'Metro',\n", + " 'iWin',\n", + " 'Treyarch, Shaba Games',\n", + " 'Prokion',\n", + " '2K Shanghai',\n", + " 'Artech Studios',\n", + " \"Crispy's\",\n", + " 'Left Field Productions',\n", + " 'Mitchell',\n", + " 'Activision Value',\n", + " 'Cyanide, Cyanide Studios',\n", + " 'AQ Interactive',\n", + " 'Starfish SD',\n", + " 'FuRyu',\n", + " 'Sony Interactive Entertainment',\n", + " 'InXile Entertainment',\n", + " '2015',\n", + " 'Tornado Studios',\n", + " 'Stainless Games',\n", + " 'ZootFly',\n", + " 'Empire Interactive',\n", + " 'Southend Interactive',\n", + " 'Amaze',\n", + " \"Y's K\",\n", + " 'Data Design Interactive',\n", + " 'Bec',\n", + " 'Io Interactive, Nixxes Software',\n", + " 'Handheld Games',\n", + " 'Tamsoft, D3Publisher',\n", + " 'Falcom',\n", + " 'Metro Graphics',\n", + " 'Lucky Chicken',\n", + " 'Matrix Software, Marvelous Entertainment',\n", + " 'Nautilus',\n", + " 'Hasbro Interactive',\n", + " 'Frogwares',\n", + " 'Oxygen Interactive',\n", + " 'Revolution Software',\n", + " 'Larian Studios',\n", + " 'Blueshift',\n", + " 'Compile Heart',\n", + " 'SolWorks',\n", + " 'Magic Pixel Games',\n", + " 'Gratuitous Games',\n", + " 'Sanrio Digital',\n", + " 'Griffin International',\n", + " 'K2',\n", + " 'Hudson, NATSUME ATARI Inc.',\n", + " 'Jet Black Games',\n", + " 'Bits Studios',\n", + " 'Piranha Bytes',\n", + " 'Alphadream Corporation',\n", + " 'Tecmo Koei Games',\n", + " 'Phoenix Interactive Entertainment',\n", + " 'Tecmo, Ntreev Soft',\n", + " 'Monumental Games',\n", + " 'Climax Entertainment',\n", + " 'Aspect, Takara Tomy',\n", + " 'Secret Stash Games',\n", + " 'GungHo',\n", + " 'Beyond Reality',\n", + " 'Fall Line Studio',\n", + " 'SoftMax',\n", + " 'Bit Town',\n", + " 'Phoenix Entertainment',\n", + " 'Career Soft',\n", + " 'Ubisoft Sao Paulo',\n", + " 'Stealth Studios',\n", + " 'Mobius Entertainment',\n", + " 'Pseudo Interactive',\n", + " 'Rhino Studios',\n", + " 'Ivory Tower',\n", + " 'Backbone Vancouver',\n", + " 'Irem',\n", + " 'Kodiak Interactive',\n", + " 'Digital Fiction',\n", + " 'MASA Group',\n", + " 'Classic Media Distribution Limited',\n", + " 'Swordfish Studios',\n", + " 'ThatGameCompany',\n", + " 'Creative Patterns',\n", + " 'Osiris Studios',\n", + " 'Deep Silver, Sproing Interactive Media, Sproing',\n", + " 'Gunnar Games',\n", + " '2XL Games',\n", + " 'Super X Studios',\n", + " 'Jellyvision, Iron Galaxy Studios',\n", + " 'Bongfish',\n", + " 'Cranky Pants Games',\n", + " 'Sun-Tec',\n", + " 'Alpha Unit',\n", + " 'Gimagin Games',\n", + " 'Sanzaru Games, Sanzaru Games, Inc.',\n", + " 'CCP',\n", + " 'Hitmaker',\n", + " 'SCEA, Think and Feel',\n", + " 'MicroProse, n-Space',\n", + " 'Studio Saizensen',\n", + " 'Ubisoft Annecy',\n", + " 'Pyro Studios',\n", + " 'WideScreen Games',\n", + " 'Gaming Minds Studios',\n", + " 'Omega Force, Koei Canada',\n", + " 'Aspyr',\n", + " 'Cauldron Ltd.',\n", + " 'Sega, Tom Create',\n", + " 'Mad Catz',\n", + " 'Examu',\n", + " 'Merscom LLC',\n", + " 'Vivendi Games',\n", + " 'RFX Interactive',\n", + " 'FeelPlus',\n", + " 'Spiders',\n", + " 'GCP1',\n", + " 'Nordcurrent',\n", + " 'Zono Inc.',\n", + " 'Software Creations',\n", + " 'Pixel Tales',\n", + " 'Magic Rings',\n", + " 'Artificial Mind and Movement, EA Redwood Shores',\n", + " 'Santa Cruz Games',\n", + " 'Seta Corporation',\n", + " 'BottleRocket Entertainment',\n", + " 'Skip Ltd.',\n", + " 'SME Dynamic Systems Limited',\n", + " 'Game Sauce',\n", + " 'Cinemaware',\n", + " 'Games Farm',\n", + " 'Ubisoft, Ludia Inc.',\n", + " 'Tecmo Koei Canada',\n", + " ...]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df=videoReview\n", + "df['Developer'].unique().tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(16719, 7)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamePlatformGenrePublisherUser_ScoreDeveloperRating
0Wii SportsWiiSportsNintendo8NintendoE
1Super Mario Bros.NESPlatformNintendoNaNNaNNaN
2Mario Kart WiiWiiRacingNintendo8.3NintendoE
3Wii Sports ResortWiiSportsNintendo8NintendoE
4Pokemon Red/Pokemon BlueGBRole-PlayingNintendoNaNNaNNaN
5TetrisGBPuzzleNintendoNaNNaNNaN
6New Super Mario Bros.DSPlatformNintendo8.5NintendoE
7Wii PlayWiiMiscNintendo6.6NintendoE
8New Super Mario Bros. WiiWiiPlatformNintendo8.4NintendoE
9Duck HuntNESShooterNintendoNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " Name Platform Genre Publisher User_Score \\\n", + "0 Wii Sports Wii Sports Nintendo 8 \n", + "1 Super Mario Bros. NES Platform Nintendo NaN \n", + "2 Mario Kart Wii Wii Racing Nintendo 8.3 \n", + "3 Wii Sports Resort Wii Sports Nintendo 8 \n", + "4 Pokemon Red/Pokemon Blue GB Role-Playing Nintendo NaN \n", + "5 Tetris GB Puzzle Nintendo NaN \n", + "6 New Super Mario Bros. DS Platform Nintendo 8.5 \n", + "7 Wii Play Wii Misc Nintendo 6.6 \n", + "8 New Super Mario Bros. Wii Wii Platform Nintendo 8.4 \n", + "9 Duck Hunt NES Shooter Nintendo NaN \n", + "\n", + " Developer Rating \n", + "0 Nintendo E \n", + "1 NaN NaN \n", + "2 Nintendo E \n", + "3 Nintendo E \n", + "4 NaN NaN \n", + "5 NaN NaN \n", + "6 Nintendo E \n", + "7 Nintendo E \n", + "8 Nintendo E \n", + "9 NaN NaN " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#We don’t want to work with all the data in these examples. \n", + "#We just want to concentrate on the categorical variables. \n", + "#Let’s us create a subset of the dataframe to contains these.\n", + "df = videoReview.select_dtypes(include=['object']).copy()\n", + "print(df.shape)\n", + "df.head(10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Name 0\n", + "Platform 0\n", + "Genre 0\n", + "Publisher 0\n", + "User_Score 0\n", + "Developer 0\n", + "Rating 0\n", + "dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Now do a little data clean up by removing NaN (nulls)\n", + "df.dropna(inplace=True)\n", + "df.isnull().sum()\n", + "#NB: Before removing NaN records you could clean the data and replace these with other values" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(9873, 7)\n" + ] + } + ], + "source": [ + "print(df.shape)\n", + "#Our data set has been reduced by approx 7,000 records!" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "#Question: After running all of this code, modify the above to do better replacement of NaN \n", + "# and see what impact that has." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamePlatformGenrePublisherUser_ScoreDeveloperRating
count9873987398739873987398739873
unique602017123299616568
topMadden NFL 07PS2ActionElectronic ArtstbdUbisoftE
freq914782177113723742043949
\n", + "
" + ], + "text/plain": [ + " Name Platform Genre Publisher User_Score Developer \\\n", + "count 9873 9873 9873 9873 9873 9873 \n", + "unique 6020 17 12 329 96 1656 \n", + "top Madden NFL 07 PS2 Action Electronic Arts tbd Ubisoft \n", + "freq 9 1478 2177 1137 2374 204 \n", + "\n", + " Rating \n", + "count 9873 \n", + "unique 8 \n", + "top E \n", + "freq 3949 " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Output some summary statistics for the data\n", + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countuniquetopfreq
Name98736020Madden NFL 079
Platform987317PS21478
Genre987312Action2177
Publisher9873329Electronic Arts1137
User_Score987396tbd2374
Developer98731656Ubisoft204
Rating98738E3949
\n", + "
" + ], + "text/plain": [ + " count unique top freq\n", + "Name 9873 6020 Madden NFL 07 9\n", + "Platform 9873 17 PS2 1478\n", + "Genre 9873 12 Action 2177\n", + "Publisher 9873 329 Electronic Arts 1137\n", + "User_Score 9873 96 tbd 2374\n", + "Developer 9873 1656 Ubisoft 204\n", + "Rating 9873 8 E 3949" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#transpose it to display differently\n", + "df.describe().transpose()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1sAAAKnCAYAAABwG1fTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAABMa0lEQVR4nO3de5RVdd0/8M+BGYb7JCAzjowKiVfQDMzAkvECaqGVJSqJ+IQoqSgPEEp2QR+FpaVg+FPMUExU/NXjpaxQLCUVL4CSooU3VFBGynCQi8Nt//7ox1mOA4owX4aR12utsxZn7+8+896HfS7v2ZfJZVmWBQAAAHWqUX0HAAAA+CxStgAAABJQtgAAABJQtgAAABJQtgAAABJQtgAAABJQtgAAABJQtgAAABIoqO8ADcWGDRvi7bffjlatWkUul6vvOAAAQD3Jsizef//9KCsri0aNNr//StnaQm+//XaUl5fXdwwAAGAHsWjRoujQocNm5ytbW6hVq1YR8Z8ntHXr1vWcBgAAqC/Lly+P8vLyfEfYHGVrC208dLB169bKFgAA8ImnF7lABgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQALKFgAAQAIF9R0A2PFMnjOkviPUMKj7pPqOAADwqdmzBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkEC9lq2//vWvccIJJ0RZWVnkcrm49957Nzv2nHPOiVwuFxMmTKgxvbq6OoYOHRrt2rWLFi1axIknnhiLFy+uMWbZsmUxYMCAKC4ujuLi4hgwYEC89957db9CAAAA/1+9lq2VK1fGwQcfHNddd93Hjrv33nvjqaeeirKyslrzhg0bFvfcc09MmzYtHnvssVixYkX07ds31q9fnx/Tv3//mDdvXkyfPj2mT58e8+bNiwEDBtT5+gAAAGxUUJ8//Pjjj4/jjz/+Y8e89dZbcf7558cDDzwQX//612vMq6qqismTJ8dtt90WxxxzTERETJ06NcrLy+Ohhx6KY489Nv7+97/H9OnT48knn4zDDjssIiJuuumm6NGjRyxYsCD23XffNCsHAADs1Hboc7Y2bNgQAwYMiB/84Adx4IEH1po/d+7cWLt2bfTp0yc/raysLLp06RKzZs2KiIgnnngiiouL80UrIuLLX/5yFBcX58dsSnV1dSxfvrzGDQAAYEvt0GXryiuvjIKCgrjgggs2Ob+ysjKaNGkSu+yyS43pJSUlUVlZmR/Tvn37Wsu2b98+P2ZTxo0blz/Hq7i4OMrLy7dhTQAAgJ3NDlu25s6dG9dee21MmTIlcrncp1o2y7Iay2xq+Y+O+ajRo0dHVVVV/rZo0aJPlQEAANi57bBl69FHH42lS5fGHnvsEQUFBVFQUBBvvPFGjBgxIvbaa6+IiCgtLY01a9bEsmXLaiy7dOnSKCkpyY955513aj3+P//5z/yYTSkqKorWrVvXuAEAAGypHbZsDRgwIJ577rmYN29e/lZWVhY/+MEP4oEHHoiIiG7dukVhYWHMmDEjv9ySJUti/vz50bNnz4iI6NGjR1RVVcXTTz+dH/PUU09FVVVVfgwAAEBdq9erEa5YsSJeeeWV/P2FCxfGvHnzok2bNrHHHntE27Zta4wvLCyM0tLS/BUEi4uLY9CgQTFixIho27ZttGnTJkaOHBldu3bNX51w//33j+OOOy4GDx4cN954Y0REnH322dG3b19XIgQAAJKp17I1Z86cOPLII/P3hw8fHhERAwcOjClTpmzRY4wfPz4KCgqiX79+sXr16jj66KNjypQp0bhx4/yY22+/PS644IL8VQtPPPHET/zbXgAAANsil2VZVt8hGoLly5dHcXFxVFVVOX+Lz7zJc4bUd4QaBnWfVN8RAADytrQb7LDnbAEAADRkyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACyhYAAEACBfUdgO1vzrVD6jtCDd0vnFTfEQAAoM7ZswUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJCAsgUAAJBAvZatv/71r3HCCSdEWVlZ5HK5uPfee/Pz1q5dGxdddFF07do1WrRoEWVlZXHGGWfE22+/XeMxqqurY+jQodGuXbto0aJFnHjiibF48eIaY5YtWxYDBgyI4uLiKC4ujgEDBsR77723HdYQAADYWdVr2Vq5cmUcfPDBcd1119Wat2rVqnjmmWfixz/+cTzzzDNx9913x0svvRQnnnhijXHDhg2Le+65J6ZNmxaPPfZYrFixIvr27Rvr16/Pj+nfv3/Mmzcvpk+fHtOnT4958+bFgAEDkq8fAACw8yqozx9+/PHHx/HHH7/JecXFxTFjxowa0yZOnBhf+tKX4s0334w99tgjqqqqYvLkyXHbbbfFMcccExERU6dOjfLy8njooYfi2GOPjb///e8xffr0ePLJJ+Owww6LiIibbropevToEQsWLIh999037UoCAAA7pQZ1zlZVVVXkcrn43Oc+FxERc+fOjbVr10afPn3yY8rKyqJLly4xa9asiIh44oknori4OF+0IiK+/OUvR3FxcX7MplRXV8fy5ctr3AAAALZUgylbH3zwQVx88cXRv3//aN26dUREVFZWRpMmTWKXXXapMbakpCQqKyvzY9q3b1/r8dq3b58fsynjxo3Ln+NVXFwc5eXldbg2AADAZ12DKFtr166NU089NTZs2BDXX3/9J47PsixyuVz+/of/vbkxHzV69OioqqrK3xYtWrR14QEAgJ3SDl+21q5dG/369YuFCxfGjBkz8nu1IiJKS0tjzZo1sWzZshrLLF26NEpKSvJj3nnnnVqP+89//jM/ZlOKioqidevWNW4AAABbaocuWxuL1ssvvxwPPfRQtG3btsb8bt26RWFhYY0LaSxZsiTmz58fPXv2jIiIHj16RFVVVTz99NP5MU899VRUVVXlxwAAANS1er0a4YoVK+KVV17J31+4cGHMmzcv2rRpE2VlZfGd73wnnnnmmbj//vtj/fr1+XOs2rRpE02aNIni4uIYNGhQjBgxItq2bRtt2rSJkSNHRteuXfNXJ9x///3juOOOi8GDB8eNN94YERFnn3129O3b15UIAQCAZOq1bM2ZMyeOPPLI/P3hw4dHRMTAgQNjzJgx8bvf/S4iIr7whS/UWO7hhx+OioqKiIgYP358FBQURL9+/WL16tVx9NFHx5QpU6Jx48b58bfffntccMEF+asWnnjiiZv8214AAAB1pV7LVkVFRWRZttn5Hzdvo6ZNm8bEiRNj4sSJmx3Tpk2bmDp16lZlBAAA2Bo79DlbAAAADZWyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkICyBQAAkEBBfQeALTXn/iH1HaGG7n0n1XcEAAB2YPZsAQAAJKBsAQAAJKBsAQAAJKBsAQAAJKBsAQAAJKBsAQAAJKBsAQAAJKBsAQAAJKBsAQAAJKBsAQAAJKBsAQAAJFCvZeuvf/1rnHDCCVFWVha5XC7uvffeGvOzLIsxY8ZEWVlZNGvWLCoqKuKFF16oMaa6ujqGDh0a7dq1ixYtWsSJJ54YixcvrjFm2bJlMWDAgCguLo7i4uIYMGBAvPfee4nXDgAA2JnVa9lauXJlHHzwwXHddddtcv5VV10V11xzTVx33XUxe/bsKC0tjd69e8f777+fHzNs2LC45557Ytq0afHYY4/FihUrom/fvrF+/fr8mP79+8e8efNi+vTpMX369Jg3b14MGDAg+foBAAA7r4L6/OHHH398HH/88Zucl2VZTJgwIS655JI46aSTIiLi1ltvjZKSkrjjjjvinHPOiaqqqpg8eXLcdtttccwxx0RExNSpU6O8vDweeuihOPbYY+Pvf/97TJ8+PZ588sk47LDDIiLipptuih49esSCBQti33333T4rCwAA7FR22HO2Fi5cGJWVldGnT5/8tKKioujVq1fMmjUrIiLmzp0ba9eurTGmrKwsunTpkh/zxBNPRHFxcb5oRUR8+ctfjuLi4vyYTamuro7ly5fXuAEAAGypHbZsVVZWRkRESUlJjeklJSX5eZWVldGkSZPYZZddPnZM+/btaz1++/bt82M2Zdy4cflzvIqLi6O8vHyb1gcAANi51OthhFsil8vVuJ9lWa1pH/XRMZsa/0mPM3r06Bg+fHj+/vLly2sVrjkXDPnYHPWh+y8m1XcEAAAgduA9W6WlpRERtfY+LV26NL+3q7S0NNasWRPLli372DHvvPNOrcf/5z//WWuv2YcVFRVF69ata9wAAAC21A5btjp27BilpaUxY8aM/LQ1a9bEzJkzo2fPnhER0a1btygsLKwxZsmSJTF//vz8mB49ekRVVVU8/fTT+TFPPfVUVFVV5ccAAADUtXo9jHDFihXxyiuv5O8vXLgw5s2bF23atIk99tgjhg0bFmPHjo3OnTtH586dY+zYsdG8efPo379/REQUFxfHoEGDYsSIEdG2bdto06ZNjBw5Mrp27Zq/OuH+++8fxx13XAwePDhuvPHGiIg4++yzo2/fvq5ECAAAJFOvZWvOnDlx5JFH5u9vPEdq4MCBMWXKlBg1alSsXr06zj333Fi2bFkcdthh8eCDD0arVq3yy4wfPz4KCgqiX79+sXr16jj66KNjypQp0bhx4/yY22+/PS644IL8VQtPPPHEzf5tLwAAgLpQr2WroqIisizb7PxcLhdjxoyJMWPGbHZM06ZNY+LEiTFx4sTNjmnTpk1MnTp1W6ICAAB8KjvsOVsAAAANmbIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQQEF9B4DPskfmDKnvCLVUdJ9U3xEAAHYK9mwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAksFVl66ijjor33nuv1vTly5fHUUcdta2ZAAAAGrytKluPPPJIrFmzptb0Dz74IB599NFtDgUAANDQFXyawc8991z+3y+++GJUVlbm769fvz6mT58eu+++e92lAwAAaKA+1Z6tL3zhC3HIIYdELpeLo446Kr7whS/kb926dYvLL788fvKTn9RZuHXr1sWPfvSj6NixYzRr1iw6deoUl112WWzYsCE/JsuyGDNmTJSVlUWzZs2ioqIiXnjhhRqPU11dHUOHDo127dpFixYt4sQTT4zFixfXWU4AAICP+lR7thYuXBhZlkWnTp3i6aefjl133TU/r0mTJtG+ffto3LhxnYW78sorY9KkSXHrrbfGgQceGHPmzIn/+q//iuLi4rjwwgsjIuKqq66Ka665JqZMmRL77LNPXH755dG7d+9YsGBBtGrVKiIihg0bFr///e9j2rRp0bZt2xgxYkT07ds35s6dW6d5AQAANvpUZWvPPfeMiKixZymlJ554Ir7xjW/E17/+9YiI2GuvveLOO++MOXPmRMR/9mpNmDAhLrnkkjjppJMiIuLWW2+NkpKSuOOOO+Kcc86JqqqqmDx5ctx2221xzDHHRETE1KlTo7y8PB566KE49thjt8u6AAAAO5dPVbY+7KWXXopHHnkkli5dWqt81dWhhF/5yldi0qRJ8dJLL8U+++wTf/vb3+Kxxx6LCRMmRMR/9rRVVlZGnz598ssUFRVFr169YtasWXHOOefE3LlzY+3atTXGlJWVRZcuXWLWrFmbLVvV1dVRXV2dv798+fI6WScAAGDnsFVl66abborvf//70a5duygtLY1cLpefl8vl6qxsXXTRRVFVVRX77bdfNG7cONavXx9XXHFFnHbaaRER+Qt0lJSU1FiupKQk3njjjfyYJk2axC677FJrzIcv8PFR48aNi0svvbRO1gMAANj5bFXZuvzyy+OKK66Iiy66qK7z1HDXXXfF1KlT44477ogDDzww5s2bF8OGDYuysrIYOHBgftyHy17Efw4v/Oi0j/qkMaNHj47hw4fn7y9fvjzKy8u3ck0AAICdzVaVrWXLlsXJJ59c11lq+cEPfhAXX3xxnHrqqRER0bVr13jjjTdi3LhxMXDgwCgtLY2I/+y92m233fLLLV26NL+3q7S0NNasWRPLli2rsXdr6dKl0bNnz83+7KKioigqKkqxWgAAwE5gq/6o8cknnxwPPvhgXWepZdWqVdGoUc2IjRs3zp8j1rFjxygtLY0ZM2bk569ZsyZmzpyZL1LdunWLwsLCGmOWLFkS8+fP/9iyBQAAsC22as/W3nvvHT/+8Y/jySefjK5du0ZhYWGN+RdccEGdhDvhhBPiiiuuiD322CMOPPDAePbZZ+Oaa66J733vexHxn8MHhw0bFmPHjo3OnTtH586dY+zYsdG8efPo379/REQUFxfHoEGDYsSIEdG2bdto06ZNjBw5Mrp27Zq/OiEAAEBd26qy9ctf/jJatmwZM2fOjJkzZ9aYl8vl6qxsTZw4MX784x/HueeeG0uXLo2ysrI455xzalyAY9SoUbF69eo499xzY9myZXHYYYfFgw8+mP8bWxER48ePj4KCgujXr1+sXr06jj766JgyZYq/sQUAACSzVWVr4cKFdZ1jk1q1ahUTJkzIX+p9U3K5XIwZMybGjBmz2TFNmzaNiRMnxsSJE+s+JAAAwCZs1TlbAAAAfLyt2rO18Zypzbn55pu3KgwAAMBnxVZf+v3D1q5dG/Pnz4/33nsvjjrqqDoJBgAA0JBtVdm65557ak3bsGFDnHvuudGpU6dtDgUAANDQ1dk5W40aNYr//u//jvHjx9fVQwIAADRYdXqBjFdffTXWrVtXlw8JAADQIG3VYYTDhw+vcT/LsliyZEn84Q9/iIEDB9ZJMAAAgIZsq8rWs88+W+N+o0aNYtddd42rr776E69UCAAAsDPYqrL18MMP13UOAACAz5StKlsb/fOf/4wFCxZELpeLffbZJ3bddde6ygUARMSQ++fUd4QaJvXtXt8RABqMrbpAxsqVK+N73/te7LbbbnHEEUfEV7/61SgrK4tBgwbFqlWr6jojAABAg7NVZWv48OExc+bM+P3vfx/vvfdevPfee3HffffFzJkzY8SIEXWdEQAAoMHZqsMI//d//zd++9vfRkVFRX7a1772tWjWrFn069cvbrjhhrrKBwAA0CBt1Z6tVatWRUlJSa3p7du3dxghAABAbGXZ6tGjR/z0pz+NDz74ID9t9erVcemll0aPHj3qLBwAAEBDtVWHEU6YMCGOP/746NChQxx88MGRy+Vi3rx5UVRUFA8++GBdZwQAAGhwtqpsde3aNV5++eWYOnVq/OMf/4gsy+LUU0+N7373u9GsWbO6zggAANDgbFXZGjduXJSUlMTgwYNrTL/55pvjn//8Z1x00UV1Eg4AAKCh2qpztm688cbYb7/9ak0/8MADY9KkSdscCgAAoKHbqrJVWVkZu+22W63pu+66ayxZsmSbQwEAADR0W3UYYXl5eTz++OPRsWPHGtMff/zxKCsrq5NgAFCXhsx5pL4j1DKpe0V9RwAgoa0qW2eddVYMGzYs1q5dG0cddVRERPz5z3+OUaNGxYgRI+o0IAAAQEO0VWVr1KhR8e9//zvOPffcWLNmTURENG3aNC666KIYPXp0nQYEAABoiLaqbOVyubjyyivjxz/+cfz973+PZs2aRefOnaOoqKiu8wEAADRIW1W2NmrZsmUceuihdZUFAADgM2OrrkYIAADAx1O2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAEtjhy9Zbb70Vp59+erRt2zaaN28eX/jCF2Lu3Ln5+VmWxZgxY6KsrCyaNWsWFRUV8cILL9R4jOrq6hg6dGi0a9cuWrRoESeeeGIsXrx4e68KAACwE9mhy9ayZcvi8MMPj8LCwvjTn/4UL774Ylx99dXxuc99Lj/mqquuimuuuSauu+66mD17dpSWlkbv3r3j/fffz48ZNmxY3HPPPTFt2rR47LHHYsWKFdG3b99Yv359PawVAACwMyio7wAf58orr4zy8vK45ZZb8tP22muv/L+zLIsJEybEJZdcEieddFJERNx6661RUlISd9xxR5xzzjlRVVUVkydPjttuuy2OOeaYiIiYOnVqlJeXx0MPPRTHHnvsdl0nAABg57BD79n63e9+F927d4+TTz452rdvH4ccckjcdNNN+fkLFy6MysrK6NOnT35aUVFR9OrVK2bNmhUREXPnzo21a9fWGFNWVhZdunTJjwEAAKhrO3TZeu211+KGG26Izp07xwMPPBBDhgyJCy64IH79619HRERlZWVERJSUlNRYrqSkJD+vsrIymjRpErvssstmx2xKdXV1LF++vMYNAABgS+3QhxFu2LAhunfvHmPHjo2IiEMOOSReeOGFuOGGG+KMM87Ij8vlcjWWy7Ks1rSP+qQx48aNi0svvXQb0gMAADuzHXrP1m677RYHHHBAjWn7779/vPnmmxERUVpaGhFRaw/V0qVL83u7SktLY82aNbFs2bLNjtmU0aNHR1VVVf62aNGibV4fAABg57FDl63DDz88FixYUGPaSy+9FHvuuWdERHTs2DFKS0tjxowZ+flr1qyJmTNnRs+ePSMiolu3blFYWFhjzJIlS2L+/Pn5MZtSVFQUrVu3rnEDAADYUjv0YYT//d//HT179oyxY8dGv3794umnn45f/vKX8ctf/jIi/nP44LBhw2Ls2LHRuXPn6Ny5c4wdOzaaN28e/fv3j4iI4uLiGDRoUIwYMSLatm0bbdq0iZEjR0bXrl3zVycEAACoazt02Tr00EPjnnvuidGjR8dll10WHTt2jAkTJsR3v/vd/JhRo0bF6tWr49xzz41ly5bFYYcdFg8++GC0atUqP2b8+PFRUFAQ/fr1i9WrV8fRRx8dU6ZMicaNG9fHagEAADuBHbpsRUT07ds3+vbtu9n5uVwuxowZE2PGjNnsmKZNm8bEiRNj4sSJCRICAADUtkOfswUAANBQKVsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJFNR3AADgs2XItXPqO0Itky7sXt8RgJ2QPVsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJFNR3AAAAdi53DZlT3xFqOGVS9/qOwGeUPVsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJKFsAAAAJFNR3AAAAts7YIXPqO0ItP5zUvb4jwA7Dni0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAElC0AAIAEGlTZGjduXORyuRg2bFh+WpZlMWbMmCgrK4tmzZpFRUVFvPDCCzWWq66ujqFDh0a7du2iRYsWceKJJ8bixYu3c3oAAGBn0mDK1uzZs+OXv/xlHHTQQTWmX3XVVXHNNdfEddddF7Nnz47S0tLo3bt3vP/++/kxw4YNi3vuuSemTZsWjz32WKxYsSL69u0b69ev396rAQAA7CQaRNlasWJFfPe7342bbropdtlll/z0LMtiwoQJcckll8RJJ50UXbp0iVtvvTVWrVoVd9xxR0REVFVVxeTJk+Pqq6+OY445Jg455JCYOnVqPP/88/HQQw/V1yoBAACfcQ2ibJ133nnx9a9/PY455pga0xcuXBiVlZXRp0+f/LSioqLo1atXzJo1KyIi5s6dG2vXrq0xpqysLLp06ZIfsynV1dWxfPnyGjcAAIAtVVDfAT7JtGnT4plnnonZs2fXmldZWRkRESUlJTWml5SUxBtvvJEf06RJkxp7xDaO2bj8powbNy4uvfTSbY0PAADspHboPVuLFi2KCy+8MKZOnRpNmzbd7LhcLlfjfpZltaZ91CeNGT16dFRVVeVvixYt+nThAQCAndoOXbbmzp0bS5cujW7dukVBQUEUFBTEzJkz4xe/+EUUFBTk92h9dA/V0qVL8/NKS0tjzZo1sWzZss2O2ZSioqJo3bp1jRsAAMCW2qHL1tFHHx3PP/98zJs3L3/r3r17fPe734158+ZFp06dorS0NGbMmJFfZs2aNTFz5szo2bNnRER069YtCgsLa4xZsmRJzJ8/Pz8GAACgru3Q52y1atUqunTpUmNaixYtom3btvnpw4YNi7Fjx0bnzp2jc+fOMXbs2GjevHn0798/IiKKi4tj0KBBMWLEiGjbtm20adMmRo4cGV27dq11wQ0AAIC6skOXrS0xatSoWL16dZx77rmxbNmyOOyww+LBBx+MVq1a5ceMHz8+CgoKol+/frF69eo4+uijY8qUKdG4ceN6TA4AAHyWNbiy9cgjj9S4n8vlYsyYMTFmzJjNLtO0adOYOHFiTJw4MW04AACA/2+HPmcLAACgoVK2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAEiio7wAAO7Mhc35Y3xFqmNR9bH1HAIDPDHu2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAElC2AAAAEiio7wAAdeGHc4bUd4RaxnafVN8RAIB6ZM8WAABAAsoWAABAAsoWAABAAsoWAABAAsoWAABAAjt02Ro3blwceuih0apVq2jfvn1885vfjAULFtQYk2VZjBkzJsrKyqJZs2ZRUVERL7zwQo0x1dXVMXTo0GjXrl20aNEiTjzxxFi8ePH2XBUAAGAns0OXrZkzZ8Z5550XTz75ZMyYMSPWrVsXffr0iZUrV+bHXHXVVXHNNdfEddddF7Nnz47S0tLo3bt3vP/++/kxw4YNi3vuuSemTZsWjz32WKxYsSL69u0b69evr4/VAgAAdgI79N/Zmj59eo37t9xyS7Rv3z7mzp0bRxxxRGRZFhMmTIhLLrkkTjrppIiIuPXWW6OkpCTuuOOOOOecc6KqqiomT54ct912WxxzzDERETF16tQoLy+Phx56KI499tjtvl4AAMBn3w69Z+ujqqqqIiKiTZs2ERGxcOHCqKysjD59+uTHFBUVRa9evWLWrFkRETF37txYu3ZtjTFlZWXRpUuX/JhNqa6ujuXLl9e4AQAAbKkdes/Wh2VZFsOHD4+vfOUr0aVLl4iIqKysjIiIkpKSGmNLSkrijTfeyI9p0qRJ7LLLLrXGbFx+U8aNGxeXXnppXa4CwGfCkDmT6ztCLZO6D6rvCABQS4PZs3X++efHc889F3feeWeteblcrsb9LMtqTfuoTxozevToqKqqyt8WLVq0dcEBAICdUoMoW0OHDo3f/e538fDDD0eHDh3y00tLSyMiau2hWrp0aX5vV2lpaaxZsyaWLVu22TGbUlRUFK1bt65xAwAA2FI7dNnKsizOP//8uPvuu+Mvf/lLdOzYscb8jh07RmlpacyYMSM/bc2aNTFz5szo2bNnRER069YtCgsLa4xZsmRJzJ8/Pz8GAACgru3Q52ydd955cccdd8R9990XrVq1yu/BKi4ujmbNmkUul4thw4bF2LFjo3PnztG5c+cYO3ZsNG/ePPr3758fO2jQoBgxYkS0bds22rRpEyNHjoyuXbvmr04IAABQ13bosnXDDTdERERFRUWN6bfcckuceeaZERExatSoWL16dZx77rmxbNmyOOyww+LBBx+MVq1a5cePHz8+CgoKol+/frF69eo4+uijY8qUKdG4cePttSoAAMBOZocuW1mWfeKYXC4XY8aMiTFjxmx2TNOmTWPixIkxceLEOkwHAMDOZM6QOfUdoYbuk7rXdwQ+wQ59zhYAAEBDpWwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkoGwBAAAkUFDfAQAAgDTmDLmrviPU0n3SKfUdYbuxZwsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACCBgvoOAACwIxhywZz6jlDDpF90r+8IwDayZwsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACABZQsAACCBgvoOAAAA8GFzhlxR3xFq6D7pkq1azp4tAACABHaqsnX99ddHx44do2nTptGtW7d49NFH6zsSAADwGbXTlK277rorhg0bFpdcckk8++yz8dWvfjWOP/74ePPNN+s7GgAA8Bm005Sta665JgYNGhRnnXVW7L///jFhwoQoLy+PG264ob6jAQAAn0E7xQUy1qxZE3Pnzo2LL764xvQ+ffrErFmzNrlMdXV1VFdX5+9XVVVFRMTy5cvz01asWZMg7bb5cL7NWfHBjpV7SzJHRKxY1fByr1yxY2WO2LLcq3ew3FuSuXoHyxyxZbnXrKj+xDHb05ZlXr0dknw6W5Z75XZI8ulsUe5VK7ZDki23RZk/2LEyR2xh7jU7Vu4tyfzBDpY5Ystyr9rBcm/xd5EGmHvFmlXbIcmns2W5P9gOSbbcRzNvvJ9l2ccul8s+acRnwNtvvx277757PP7449GzZ8/89LFjx8att94aCxYsqLXMmDFj4tJLL92eMQEAgAZk0aJF0aFDh83O3yn2bG2Uy+Vq3M+yrNa0jUaPHh3Dhw/P39+wYUP8+9//jrZt2252ma21fPnyKC8vj0WLFkXr1q3r9LFTaYiZIxpm7oaYOULu7akhZo5omLkbYuaIhpm7IWaOkHt7aoiZIxpm7oaYOSJt7izL4v3334+ysrKPHbdTlK127dpF48aNo7Kyssb0pUuXRklJySaXKSoqiqKiohrTPve5z6WKGBERrVu3blAbcETDzBzRMHM3xMwRcm9PDTFzRMPM3RAzRzTM3A0xc4Tc21NDzBzRMHM3xMwR6XIXFxd/4pid4gIZTZo0iW7dusWMGTNqTJ8xY0aNwwoBAADqyk6xZysiYvjw4TFgwIDo3r179OjRI375y1/Gm2++GUOGDKnvaAAAwGfQTlO2TjnllHj33XfjsssuiyVLlkSXLl3ij3/8Y+y55571HS2Kioripz/9aa3DFndkDTFzRMPM3RAzR8i9PTXEzBENM3dDzBzRMHM3xMwRcm9PDTFzRMPM3RAzR+wYuXeKqxECAABsbzvFOVsAAADbm7IFAACQgLIFAACQgLK1g3nkkUcil8vFe++9FxERU6ZMSf73vQCAbeczHPgoZSuhSZMmRatWrWLdunX5aStWrIjCwsL46le/WmPso48+GrlcLsrKymLJkiX5P5J2yimnxEsvvbRdc2905plnRi6Xi1wuF4WFhVFSUhK9e/eOm2++OTZs2JAf9+yzz0bfvn2jffv20bRp09hrr73ilFNOiX/96191mmf9+vXRs2fP+Pa3v11jelVVVZSXl8ePfvSjePfdd+O4446LsrKyKCoqivLy8jj//PNj+fLlNZbJsix+/vOfxz777JMfN3bs2BpjZs6cGd26dYumTZtGp06dYtKkSXW6Ph99fjt16hQjR46MlStXRkTE//7v/8Zhhx0WxcXF0apVqzjwwANjxIgR+eUfe+yxOPzww6Nt27bRrFmz2G+//WL8+PF1mrGuM999993Ru3fv2HXXXaN169bRo0ePeOCBB5LlveGGG+Kggw7K/zHDHj16xJ/+9Kf8/IqKivz6FBUVxe677x4nnHBC3H333bUe6+GHH44jjzwy2rRpE82bN4/OnTvHwIEDa7y+69K2Ptcf9vjjj0dBQUF84QtfSJL149TlemwPW/I+8/rrr+fXKZfLRZMmTWLvvfeOyy+/POrzmlOf9FxH/Of5rqioiOLi4mjZsmUcdNBBcdlll8W///3vHSbnJ20TU6ZMqfH877bbbtGvX79YuHBhneZM8Rk+ZsyYTb4O33vvvcjlcvHII4/U6Tp8VGVlZVx44YWx9957R9OmTaOkpCS+8pWvxKRJk2LVqlX5cc8++2ycfPLJUVJSEk2bNo199tknBg8evF2+j2xJxr322iv//9+4ceMoKyuLQYMGxbJlyzb5mPvuu280adIk3nrrrW3OtyXvERER999/f1RUVESrVq2iefPmceihh8aUKVO2+efXpW15LV599dVRXFxcY7vZ6IMPPojPfe5zcc0112z3ddjcd9WIetyuM5L5xz/+kUVE9sQTT+Sn/fGPf8w6dOiQFRUVZStXrsxPv+yyy7KysrL6iLlZAwcOzI477rhsyZIl2eLFi7O5c+dmV1xxRdayZcvs+OOPz9auXZu98847WZs2bbKBAwdmzzzzTPbaa69lf/7zn7MLL7wwe+ONN+o800svvZQ1b948mzp1an7agAEDsoMOOiirrq7O/v3vf2fXX399Nnv27Oz111/PHnrooWzffffNTjvttBqPM3To0GzffffN7rvvvuy1117Lnn322WzGjBn5+a+99lrWvHnz7MILL8xefPHF7KabbsoKCwuz3/72t3W2Lh9+ft98883s9ttvz5o1a5YNGTIkmzFjRlZQUJBdddVV2T/+8Y9swYIF2T333JOdf/75+eWfeeaZ7I477sjmz5+fLVy4MLvtttuy5s2bZzfeeGOdZazrzBdeeGF25ZVXZk8//XT20ksvZaNHj84KCwuzZ555Jkne3/3ud9kf/vCHbMGCBdmCBQuyH/7wh1lhYWE2f/78LMuyrFevXtngwYPz6/PEE09ko0aNygoLC7PBgwfnH2f+/PlZUVFR9oMf/CB7/vnns1deeSX705/+lA0aNCirrq5Okn1bn+uN3nvvvaxTp05Znz59soMPPjhJ1u2xHtvTJ73PLFy4MIuI7KGHHsqWLFmSvf7669nUqVOzpk2bZr/61a/qLffHPddZlmU//OEPs8aNG2cjR47MHn/88WzhwoXZgw8+mJ100knZhAkTdoicW7JN3HLLLVnr1q2zJUuWZG+//Xb2l7/8JevcuXPWpUuXbN26dXWWM8Vn+E9/+tNNvg6XLVuWRUT28MMP10X0TXr11Vez0tLSbL/99svuuuuu7MUXX8yee+657Le//W32ta99LbvvvvuyLMuy3//+91mTJk2yE044IZsxY0b22muvZU8++WQ2YsSIrF+/fsnyfZqMe+65Z3bZZZflv5/85S9/yfbee+/s9NNPr/WYjz76aLbHHntk/fv3zy6//PI6yflJ7xG/+MUvskaNGmWjR4/OXnjhhezll1/Ofv7zn2dFRUXZiBEj6iRDXdiW1+LSpUuzwsLC7Ne//nWtx506dWpWWFiYLV26dLuuw+a+q2ZZ/W7XylZiZWVl2bhx4/L3R40alZ133nnZAQccUOPL/VFHHZV997vfzR5++OEsIrJly5ZlWfafD5Xi4uLtnPo/Bg4cmH3jG9+oNf3Pf/5zFhHZTTfdlN1zzz1ZQUFBfmPeHq699tpsl112yd56663s3nvvzQoLC7Nnn332Y8d36NAhf//FF1/MCgoKsn/84x+bXWbUqFHZfvvtV2PaOeeck335y1/e5vwbber5Peuss7LS0tLswgsvzCoqKj71Y37rW9/a5IdNXUmR+YADDsguvfTSOkr4yXbZZZf8F+JevXplF154Ya0xN998cxYR+dfo+PHjs7322mu7ZcyyunuuTznllOxHP/rRZr/kpZZim9kePu59ZmPZ+uj7zlFHHZWde+652z/s//dxz/VTTz2VRcRmS9XGz5ztYVu3iU19Lk6dOjWLiI99X98adf0ZXp9l69hjj806dOiQrVixYpPzN2zYkK1cuTJr165d9s1vfnOTY1JvJ1uSMcv+U7bGjx9fY95ll12WHXDAAbWWOfPMM7OLL744+9Of/pR16tQp/xjbanPvEW+++WZWWFiYDR8+vNYyv/jFL7KIyJ588sksy7L89vLQQw9l3bp1y5o1a5b16NGj1nb8u9/9LvviF7+YFRUVZR07dszGjBlTJ9+7tvW1eNJJJ21yzFFHHZV9+9vf3uZ8W2JLvqvW93btMMLEKioq4uGHH87ff/jhh6OioiJ69eqVn75mzZp44okn4sgjj6yvmJ/KUUcdFQcffHDcfffdUVpaGuvWrYt77rlnux0+M3To0Dj44IPjjDPOiLPPPjt+8pOfbPbwqLfffjvuvvvu6NWrV37a73//++jUqVPcf//90bFjx9hrr73irLPOqnEYzRNPPBF9+vSp8VjHHntszJkzJ9auXZtkvSIimjVrFmvXro3S0tJ44YUXYv78+Vu87LPPPhuzZs2qsa7bw7Zk3rBhQ7z//vvRpk2bhAn/Y/369TFt2rRYuXJl9OjR42PHDhw4MHbZZZf84YSlpaWxZMmS+Otf/5o858f5tM/1LbfcEq+++mr89Kc/3U4Jt8y2bDPby6d5n4mImDNnTjzzzDNx2GGHbb+QW2Djc3377bdHy5Yt49xzz93kuPo+r2hbt4lmzZpFRNT5+/Nn5TP83XffjQcffDDOO++8aNGixSbH5HK5eOCBB+Jf//pXjBo1apNjUm4nW5pxU9566624//77a73+3n///fjNb34Tp59+evTu3TtWrlxZZ4dqbu494re//W2sXbs2Ro4cWWuZc845J1q2bBl33nlnjemXXHJJXH311TFnzpwoKCiI733ve/l5DzzwQJx++ulxwQUXxIsvvhg33nhjTJkyJa644oo6WY+P+jSvxUGDBsXMmTNrHML7+uuvx8MPPxyDBg1Kkm9Lffi7an1u1xHO2UquoqIiHn/88Vi3bl28//778eyzz8YRRxwRvXr1yr/gn3zyyVi9evUO/Ub9Ufvtt1+8/vrr8eUvfzl++MMfRv/+/aNdu3Zx/PHHx89+9rN45513kv3sXC4XN9xwQ/z5z3+OkpKSuPjii2uNOe2006J58+ax++67R+vWreNXv/pVft5rr70Wb7zxRvzmN7+JX//61zFlypSYO3dufOc738mPqaysjJKSkhqPWVJSEuvWravzc9E2evrpp+OOO+6Io48+OoYOHRqHHnpodO3aNfbaa6849dRT4+abb47q6upay3Xo0CGKioqie/fucd5558VZZ52VJF9dZt7o6quvjpUrV0a/fv2SZXz++eejZcuWUVRUFEOGDIl77rknDjjggI9dplGjRrHPPvvE66+/HhERJ598cpx22mnRq1ev2G233eJb3/pWXHfddbXOBUzp0z7XL7/8clx88cVx++23R0FBwXbL+Um2dZvZXrbkfaZnz57RsmXLaNKkSRx66KHRr1+/OOOMM+oh7aZ9+Ll++eWXo1OnTlFYWFjfsWrZ1m1i8eLF8bOf/Sw6dOgQ++yzT51m+6x8hr/yyiuRZVnsu+++Naa3a9cuWrZsGS1btoyLLrooXn755Yj4z2f8jppxo4suuihatmwZzZo1iw4dOkQul6t1jtC0adOic+fOceCBB0bjxo3j1FNPjcmTJ9dJ3s29R7z00ktRXFwcu+22W61lmjRpEp06dap1jtAVV1wRvXr1igMOOCAuvvjimDVrVnzwwQf5eRdffHEMHDgwOnXqFL17947/+Z//iRtvvLFO1uPDPu1r8dhjj42ysrIa56LdcsstUVZWVusX1vVh43fV+tyuI5St5I488shYuXJlzJ49Ox599NHYZ599on379tGrV6+YPXt2/rcse+yxR3Tq1Km+426xLMvyv2G64oororKyMiZNmhQHHHBATJo0Kfbbb794/vnnk/38m2++OZo3bx4LFy6MxYsX15o/fvz4eOaZZ+Lee++NV199NYYPH56ft2HDhqiuro5f//rX8dWvfjUqKipi8uTJ8fDDD8eCBQvy4z76G7SNe+4295u1rXH//fdHy5Yto2nTptGjR4844ogjYuLEidGiRYv4wx/+EK+88kr86Ec/ipYtW8aIESPiS1/6Uq2TUR999NGYM2dOTJo0KSZMmFDrN2Z1rS4yR0TceeedMWbMmLjrrruiffv2yfLuu+++MW/evHjyySfj+9//fgwcODBefPHFT1zuw9t448aN45ZbbonFixfHVVddFWVlZXHFFVfEgQceGEuWLEmWfWuf6/Xr10f//v3j0ksvrfMvn9tzPerbJ73P3HXXXTFv3rz429/+FnfddVfcd999myxl29PmnusPb887gm3dJqqqqqJly5bRokWLKC8vjzVr1sTdd98dTZo0qdOcn7XP8I9uA08//XTMmzcvDjzwwKiurq7XC7xs9EkZN/rBD34Q8+bNi+eeey7+/Oc/R0TE17/+9Vi/fn1+zOTJk+P000/P3z/99NPj7rvvzl8tclt90nvEpmzqtXjQQQfl/72xpC1dujQiIubOnRuXXXZZvnC2bNkyBg8eHEuWLKmT98lteS02btw4Bg4cGFOmTIkNGzZElmVx6623xplnnhmNGzfe5mzbauNzXe/bddKDFMmyLMs6dOiQXXHFFdnIkSOz73//+/np++yzT/bAAw9kFRUV2ZlnnpllWdYgztnKsizr2rVr9vWvf32T86qrq7MDDjggO+OMM5LkmjVrVlZQUJDNmDEj6927d3bUUUd97DHYjz76aBYR2dtvv51lWZb95Cc/yQoKCmqMWbVqVRYR2YMPPphlWZZ99atfzS644IIaY+6+++6soKAgW7NmTZ2sx8CBA7Njjjkme/nll7PXX3/9Ex/3tddeywoKCrKbb755s2P+53/+J9tnn33qJN+m1FXmadOmZc2aNcvuv//+ZFk35+ijj87OPvvsLMs2f87WunXrsl122SU777zzNvs4//73v7N27dplP/nJT5Lk3JbneuP5H40bN87fcrlcftqf//znJJk3JcV2vj183PvM5s7ZGjduXFZQUJCtXr26HhJ//HN9wQUXZC1btqyz969tsa3bxC233JK1atUqe/nll7NXX311s+f31JW6/Ay/+uqrN3n+58ZtKtXFgv71r39luVyuxvlnH7bxvfDuu+/OIiKbNWtWkhwfZ0szZtmmz9l64oknapxr+8ILL2QRkTVq1KjGe2FEZNdff/02593ce8Q111yTRUT21ltv1Vqmuro6a9GiRX49Prq9ZFmWPfvss1lEZAsXLsyyLMuaNm2aXXnlldnLL79c67Z+/fptWoe6eH9+5ZVXslwul82YMSObMWNGlsvlsldeeWWbcn0aW/JdtT636yxzztZ2ceSRR8YjjzwSjzzySFRUVOSn9+rVKx544IF48sknd+jDDz7qL3/5Szz//PO1Lnu6UZMmTeLzn/98jcsN15XVq1fHwIED45xzzoljjjkmfvWrX8Xs2bM/dnd69v9/o7HxN2KHH354rFu3Ll599dX8mI279Pfcc8+IiOjRo0fMmDGjxuM8+OCD0b179zo9DKdFixax9957x5577vmJj7vXXntF8+bNP/Z5zbIs+SFY25r5zjvvjDPPPDPuuOOO+PrXv54066ZsyXN06623xrJlyza7jUdE7LLLLrHbbrsl2c432trnunXr1vH888/HvHnz8rchQ4bk9/Jt7/OK6no7T21r3mci/vNb3nXr1sWaNWu2U9LaNvdc9+/fP1asWBHXX3/9Jperq9/0b6lt3SYaNWoUe++9d3Tq1Gmz5/fUlbr8DN9vv/1i8eLFUVlZWWP67Nmz8+uUQtu2baN3795x3XXXfexrq0+fPtGuXbu46qqrNjk/5XaypRk3Z+OelNWrV0fEf/ZqHXHEEfG3v/2txnvhqFGjtvlQwo97j/j2t78dBQUFcfXVV9dabtKkSbFy5co47bTTtvhnffGLX4wFCxbE3nvvXevWqNG2f43f1tfi5z//+ejVq1fccsstcfPNN0dFRUV8/vOf3+Zc2+rD31Xrc7uOiNhxDuT/DDvyyCPjvPPOi7Vr19a4eEGvXr3i+9//fnzwwQc7bNmqrq6OysrKWL9+fbzzzjsxffr0GDduXPTt2zfOOOOMuP/++2PatGlx6qmnxj777BNZlsXvf//7+OMf/xi33HJLnee5+OKLY8OGDXHllVdGRMQee+wRV199dQwfPjyOO+64ePHFF+Odd96JQw89NFq2bBkvvvhijBo1Kg4//PDYa6+9IiLimGOOiS9+8Yvxve99LyZMmBAbNmyI8847L3r37p0/5GrIkCFx3XXXxfDhw2Pw4MHxxBNPxOTJk5MforfRmDFjYtWqVfG1r30t9txzz3jvvffiF7/4RaxduzZ69+4dERH/5//8n9hjjz3yxyA/9thj8fOf/zyGDh26XTJuTeY777wzzjjjjLj22mvjy1/+cv4LR7NmzfJ/l6Yu/fCHP4zjjz8+ysvL4/33349p06bFI488EtOnT8+PWbVqVVRWVsa6devirbfeirvvvjvGjx8f3//+9/OvyxtvvDHmzZsX3/rWt+Lzn/98fPDBB/HrX/86XnjhhZg4cWKd5/4kn/RcN2rUKLp06VJjmY1/B++j0+vTlmwz9eGT3mc2evfdd/PbzvPPPx/XXnttHHnkkdG6dev6ir5Zhx12WIwaNSpGjBgRb731VnzrW9+KsrKyeOWVV2LSpEnxla98JS688ML6jrlDbhN1+Rnep0+f2H///ePUU0+NK664IsrKyuK5556LkSNHxpAhQ6JVq1apViOuv/76OPzww6N79+4xZsyYOOigg6JRo0Yxe/bs+Mc//hHdunWLFi1axK9+9as4+eST48QTT4wLLrgg9t577/jXv/4V//f//t948803Y9q0afWacaP3338/KisrI8uyWLRoUYwaNSratWsXPXv2jLVr18Ztt90Wl112Wa33vLPOOiuuuuqq+Nvf/hYHH3zwVuX8pPeIq666KkaOHBlNmzaNAQMGRGFhYdx3333xwx/+MEaMGPGpfuH1k5/8JPr27Rvl5eVx8sknR6NGjeK5556L559/Pi6//PKtyr8lPs1rcdCgQTF48OCIiBrnyG8vn/RdtXHjxvW6XTuMcDvYeHjARy8lvmjRoiwiss9//vP5aTvaYYQRkUVEVlBQkO26667ZMccck9188835XdevvvpqNnjw4GyfffbJmjVrln3uc5/LDj300OyWW26p8zyPPPJI1rhx4+zRRx+tNa9Pnz7ZUUcdlf3lL3/JevTokRUXF2dNmzbNOnfunF100UW1Luv51ltvZSeddFLWsmXLrKSkJDvzzDOzd999t9bPO+SQQ7ImTZpke+21V3bDDTfU6fp83K7vv/zlL9m3v/3trLy8PGvSpElWUlKSHXfccTXW/Re/+EV24IEHZs2bN89at26dHXLIIdn111+/zYcVpMzcq1ev/Db14dvAgQOT5P3e976X7bnnnlmTJk2yXXfdNTv66KPzh4p+NE+TJk2y3XbbLevbt292991313icZ555Jjv99NOzjh07ZkVFRVnbtm2zI444Ivvd736XJHeWbftz/VE70qXfN9qa9UhtS95nXnvttRrbb+PGjbMOHTpkgwcP3i5/V2ZzPu653uiuu+7KjjjiiKxVq1ZZixYtsoMOOii77LLL6v3S7xttyTaxvT8X6/ozfMmSJdl//dd/ZXvuuWfWrFmzbL/99ssuu+yy7IMPPki9Ktnbb7+dnX/++VnHjh2zwsLCrGXLltmXvvSl7Gc/+1mNvxs2e/bs7KSTTsp23XXXrKioKNt7772zs88+O3v55Zd3iIx77rlnjdfgrrvumn3ta1/LH9r729/+NmvUqFFWWVm5yZ/RtWvXbOjQoVuVb0veIzZs2JDdd9992Ve/+tWsRYsWWdOmTbNu3brVOjx6Sw4jzLIsmz59etazZ8+sWbNmWevWrbMvfelL2S9/+cutyv9hdfX+vGrVqqy4uDgrLi7OVq1atc25Po0t+a66UX1t17ksq++zxgAAAD57nLMFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFAACQgLIFwGfWI488ErlcLt57771tfqzHH388unbtGoWFhfHNb35zmx8PgM8+ZQuABu3MM8+MXC4XuVwuCgsLo1OnTjFy5MhYuXLlVj1eRUVFDBs2rNb04cOHxxe+8IVYuHBhTJkyZdtCA7BTKKjvAACwrY477ri45ZZbYu3atfHoo4/GWWedFStXroxTTjmlzn7Gq6++GkOGDIkOHTps9WOsWbMmmjRpUmeZANix2bMFQINXVFQUpaWlUV5eHv3794/vfve7ce+999Ya9+6778Zpp50WHTp0iObNm0fXrl3jzjvvzM8/88wzY+bMmXHttdfm95a9/vrrkcvl4t13343vfe97kcvl8nu2Zs6cGV/60peiqKgodtttt7j44otj3bp1+cerqKiI888/P4YPHx7t2rWL3r175w9tfOCBB+KQQw6JZs2axVFHHRVLly6NP/3pT7H//vtH69at47TTTotVq1alfuoASEjZAuAzp1mzZrF27dpa0z/44IPo1q1b3H///TF//vw4++yzY8CAAfHUU09FRMS1114bPXr0iMGDB8eSJUtiyZIlUV5eHkuWLInWrVvHhAkTYsmSJXHKKafEW2+9FV/72tfi0EMPjb/97W9xww03xOTJk+Pyyy+v8TNvvfXWKCgoiMcffzxuvPHG/PQxY8bEddddF7NmzYpFixZFv379YsKECXHHHXfEH/7wh5gxY0ZMnDgx7RMFQFIOIwTgM+Xpp5+OO+64I44++uha83bfffcYOXJk/v7QoUNj+vTp8Zvf/CYOO+ywKC4ujiZNmkTz5s2jtLQ0P660tDRyuVwUFxfnp19//fVRXl4e1113XeRyudhvv/3i7bffjosuuih+8pOfRKNG//l95t577x1XXXVV/rEqKysjIuLyyy+Pww8/PCIiBg0aFKNHj45XX301OnXqFBER3/nOd+Lhhx+Oiy66qI6fIQC2F3u2AGjw7r///mjZsmU0bdo0evToEUccccQm9wqtX78+rrjiijjooIOibdu20bJly3jwwQfjzTff/NQ/8+9//3v06NEjcrlcftrhhx8eK1asiMWLF+ende/efZPLH3TQQfl/l5SURPPmzfNFa+O0pUuXfupcAOw47NkCoME78sgj44YbbojCwsIoKyuLwsLCiIh48cUXa4y7+uqrY/z48TFhwoTo2rVrtGjRIoYNGxZr1qz51D8zy7IaRWvjtIioMb1FixabXH5jxo3jP3x/47QNGzZ86lwA7DiULQAavBYtWsTee+/9ieMeffTR+MY3vhGnn356RERs2LAhXn755dh///3zY5o0aRLr16//xMc64IAD4n//939rlK5Zs2ZFq1atYvfdd9/KNQHgs8RhhADsNPbee++YMWNGzJo1K/7+97/HOeeckz+HaqO99tornnrqqXj99dfjX//612b3Lp177rmxaNGiGDp0aPzjH/+I++67L37605/G8OHD8+drAbBz82kAwE7jxz/+cXzxi1+MY489NioqKqK0tDS++c1v1hgzcuTIaNy4cRxwwAGx6667bvZ8rt133z3++Mc/xtNPPx0HH3xwDBkyJAYNGhQ/+tGPtsOaANAQ5LKNB5gDAABQZ+zZAgAASEDZAgAASEDZAgAASEDZAgAASEDZAgAASEDZAgAASEDZAgAASEDZAgAASEDZAgAASEDZAgAASEDZAgAASEDZAgAASOD/Affm7/5LRuGNAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#The above table shows the number of unique values in each of the variables. \n", + "#We will use Platform, Genre and Rating for the variable example below.\n", + "#Let's chart these variables.\n", + "\n", + "#check the number of occurances for each variable \n", + "import seaborn as sb\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.rcParams['figure.figsize'] = 10, 8\n", + "\n", + "sb.countplot(x='Platform',data=df, palette='hls')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Now plot for Genre\n", + "sb.countplot(x='Genre',data=df, palette='hls')\t\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Now for Rating\n", + "sb.countplot(x='Rating',data=df, palette='hls')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1-One-hot Coding\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name_ Tales of Xillia 2Name_.hack//Infection Part 1Name_.hack//Mutation Part 2Name_.hack//Outbreak Part 3Name_007 RacingName_007: Quantum of SolaceName_007: The World is not EnoughName_1 vs. 100Name_10 Minute SolutionName_100 All-Time Favorites...Developer_syn SophiaDeveloper_zSlideRating_AORating_ERating_E10+Rating_ECRating_K-ARating_MRating_RPRating_T
00000000000...0001000000
20000000000...0001000000
30000000000...0001000000
60000000000...0001000000
70000000000...0001000000
80000000000...0001000000
110000000000...0001000000
130000000000...0001000000
140000000000...0001000000
150000000000...0001000000
160000000000...0000000100
170000000000...0000000100
190000000000...0001000000
230000000000...0000000100
240000000000...0000000100
260000000000...0001000000
280000000000...0001000000
290000000000...0000000100
320000000000...0000000100
340000000000...0000000100
\n", + "

20 rows × 8138 columns

\n", + "
" + ], + "text/plain": [ + " Name_ Tales of Xillia 2 Name_.hack//Infection Part 1 \\\n", + "0 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "6 0 0 \n", + "7 0 0 \n", + "8 0 0 \n", + "11 0 0 \n", + "13 0 0 \n", + "14 0 0 \n", + "15 0 0 \n", + "16 0 0 \n", + "17 0 0 \n", + "19 0 0 \n", + "23 0 0 \n", + "24 0 0 \n", + "26 0 0 \n", + "28 0 0 \n", + "29 0 0 \n", + "32 0 0 \n", + "34 0 0 \n", + "\n", + " Name_.hack//Mutation Part 2 Name_.hack//Outbreak Part 3 Name_007 Racing \\\n", + "0 0 0 0 \n", + "2 0 0 0 \n", + "3 0 0 0 \n", + "6 0 0 0 \n", + "7 0 0 0 \n", + "8 0 0 0 \n", + "11 0 0 0 \n", + "13 0 0 0 \n", + "14 0 0 0 \n", + "15 0 0 0 \n", + "16 0 0 0 \n", + "17 0 0 0 \n", + "19 0 0 0 \n", + "23 0 0 0 \n", + "24 0 0 0 \n", + "26 0 0 0 \n", + "28 0 0 0 \n", + "29 0 0 0 \n", + "32 0 0 0 \n", + "34 0 0 0 \n", + "\n", + " Name_007: Quantum of Solace Name_007: The World is not Enough \\\n", + "0 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "6 0 0 \n", + "7 0 0 \n", + "8 0 0 \n", + "11 0 0 \n", + "13 0 0 \n", + "14 0 0 \n", + "15 0 0 \n", + "16 0 0 \n", + "17 0 0 \n", + "19 0 0 \n", + "23 0 0 \n", + "24 0 0 \n", + "26 0 0 \n", + "28 0 0 \n", + "29 0 0 \n", + "32 0 0 \n", + "34 0 0 \n", + "\n", + " Name_1 vs. 100 Name_10 Minute Solution Name_100 All-Time Favorites ... \\\n", + "0 0 0 0 ... \n", + "2 0 0 0 ... \n", + "3 0 0 0 ... \n", + "6 0 0 0 ... \n", + "7 0 0 0 ... \n", + "8 0 0 0 ... \n", + "11 0 0 0 ... \n", + "13 0 0 0 ... \n", + "14 0 0 0 ... \n", + "15 0 0 0 ... \n", + "16 0 0 0 ... \n", + "17 0 0 0 ... \n", + "19 0 0 0 ... \n", + "23 0 0 0 ... \n", + "24 0 0 0 ... \n", + "26 0 0 0 ... \n", + "28 0 0 0 ... \n", + "29 0 0 0 ... \n", + "32 0 0 0 ... \n", + "34 0 0 0 ... \n", + "\n", + " Developer_syn Sophia Developer_zSlide Rating_AO Rating_E Rating_E10+ \\\n", + "0 0 0 0 1 0 \n", + "2 0 0 0 1 0 \n", + "3 0 0 0 1 0 \n", + "6 0 0 0 1 0 \n", + "7 0 0 0 1 0 \n", + "8 0 0 0 1 0 \n", + "11 0 0 0 1 0 \n", + "13 0 0 0 1 0 \n", + "14 0 0 0 1 0 \n", + "15 0 0 0 1 0 \n", + "16 0 0 0 0 0 \n", + "17 0 0 0 0 0 \n", + "19 0 0 0 1 0 \n", + "23 0 0 0 0 0 \n", + "24 0 0 0 0 0 \n", + "26 0 0 0 1 0 \n", + "28 0 0 0 1 0 \n", + "29 0 0 0 0 0 \n", + "32 0 0 0 0 0 \n", + "34 0 0 0 0 0 \n", + "\n", + " Rating_EC Rating_K-A Rating_M Rating_RP Rating_T \n", + "0 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "6 0 0 0 0 0 \n", + "7 0 0 0 0 0 \n", + "8 0 0 0 0 0 \n", + "11 0 0 0 0 0 \n", + "13 0 0 0 0 0 \n", + "14 0 0 0 0 0 \n", + "15 0 0 0 0 0 \n", + "16 0 0 1 0 0 \n", + "17 0 0 1 0 0 \n", + "19 0 0 0 0 0 \n", + "23 0 0 1 0 0 \n", + "24 0 0 1 0 0 \n", + "26 0 0 0 0 0 \n", + "28 0 0 0 0 0 \n", + "29 0 0 1 0 0 \n", + "32 0 0 1 0 0 \n", + "34 0 0 1 0 0 \n", + "\n", + "[20 rows x 8138 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#The first approach is to use the commonly used one-hot coding method. \n", + "#This will take a categorical variable and create a set of new variables corresponding \n", + "#with each distinct value in the variable, and then populate it with a binary value to \n", + "#indicate the original value.\n", + "\n", + "#apply one-hot-coding to all the categorical variables\n", + "# and create a new dataframe to store the results\n", + "\n", + "df2 = pd.get_dummies(df)\n", + "df2.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(9873, 8138)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see we now have 8138 variables in the pandas dataframe!\n", + "That is a lot and may not be workable for you. You may need to look at some feature reduction methods to reduce the number of variables." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2-Find and Replace - Manual Coding\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "E 3949\n", + "T 2939\n", + "M 1560\n", + "E10+ 1411\n", + "EC 8\n", + "K-A 3\n", + "RP 2\n", + "AO 1\n", + "Name: Rating, dtype: int64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#In this example we will simple replace the values with defined values.\n", + "#Let’s have a look at values in the Ratings variable and their frequencies.\n", + "df['Rating'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamePlatformYear_of_ReleaseGenrePublisherNA_SalesEU_SalesJP_SalesOther_SalesGlobal_SalesCritic_ScoreCritic_CountUser_ScoreUser_CountDeveloperRating
0Wii SportsWii2006.0SportsNintendo41.3628.963.778.4582.5376.051.08322.0NintendoE
1Super Mario Bros.NES1985.0PlatformNintendo29.083.586.810.7740.24NaNNaNNaNNaNNaNNaN
2Mario Kart WiiWii2008.0RacingNintendo15.6812.763.793.2935.5282.073.08.3709.0NintendoE
3Wii Sports ResortWii2009.0SportsNintendo15.6110.933.282.9532.7780.073.08192.0NintendoE
4Pokemon Red/Pokemon BlueGB1996.0Role-PlayingNintendo11.278.8910.221.0031.37NaNNaNNaNNaNNaNNaN
5TetrisGB1989.0PuzzleNintendo23.202.264.220.5830.26NaNNaNNaNNaNNaNNaN
6New Super Mario Bros.DS2006.0PlatformNintendo11.289.146.502.8829.8089.065.08.5431.0NintendoE
7Wii PlayWii2006.0MiscNintendo13.969.182.932.8428.9258.041.06.6129.0NintendoE
8New Super Mario Bros. WiiWii2009.0PlatformNintendo14.446.944.702.2428.3287.080.08.4594.0NintendoE
9Duck HuntNES1984.0ShooterNintendo26.930.630.280.4728.31NaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " Name Platform Year_of_Release Genre \\\n", + "0 Wii Sports Wii 2006.0 Sports \n", + "1 Super Mario Bros. NES 1985.0 Platform \n", + "2 Mario Kart Wii Wii 2008.0 Racing \n", + "3 Wii Sports Resort Wii 2009.0 Sports \n", + "4 Pokemon Red/Pokemon Blue GB 1996.0 Role-Playing \n", + "5 Tetris GB 1989.0 Puzzle \n", + "6 New Super Mario Bros. DS 2006.0 Platform \n", + "7 Wii Play Wii 2006.0 Misc \n", + "8 New Super Mario Bros. Wii Wii 2009.0 Platform \n", + "9 Duck Hunt NES 1984.0 Shooter \n", + "\n", + " Publisher NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales \\\n", + "0 Nintendo 41.36 28.96 3.77 8.45 82.53 \n", + "1 Nintendo 29.08 3.58 6.81 0.77 40.24 \n", + "2 Nintendo 15.68 12.76 3.79 3.29 35.52 \n", + "3 Nintendo 15.61 10.93 3.28 2.95 32.77 \n", + "4 Nintendo 11.27 8.89 10.22 1.00 31.37 \n", + "5 Nintendo 23.20 2.26 4.22 0.58 30.26 \n", + "6 Nintendo 11.28 9.14 6.50 2.88 29.80 \n", + "7 Nintendo 13.96 9.18 2.93 2.84 28.92 \n", + "8 Nintendo 14.44 6.94 4.70 2.24 28.32 \n", + "9 Nintendo 26.93 0.63 0.28 0.47 28.31 \n", + "\n", + " Critic_Score Critic_Count User_Score User_Count Developer Rating \n", + "0 76.0 51.0 8 322.0 Nintendo E \n", + "1 NaN NaN NaN NaN NaN NaN \n", + "2 82.0 73.0 8.3 709.0 Nintendo E \n", + "3 80.0 73.0 8 192.0 Nintendo E \n", + "4 NaN NaN NaN NaN NaN NaN \n", + "5 NaN NaN NaN NaN NaN NaN \n", + "6 89.0 65.0 8.5 431.0 Nintendo E \n", + "7 58.0 41.0 6.6 129.0 Nintendo E \n", + "8 87.0 80.0 8.4 594.0 Nintendo E \n", + "9 NaN NaN NaN NaN NaN NaN " + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "videoReview.head(10)\n", + "#df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamePlatformGenrePublisherUser_ScoreDeveloperRating
0Wii SportsWiiSportsNintendo8Nintendo1
2Mario Kart WiiWiiRacingNintendo8.3Nintendo1
3Wii Sports ResortWiiSportsNintendo8Nintendo1
6New Super Mario Bros.DSPlatformNintendo8.5Nintendo1
7Wii PlayWiiMiscNintendo6.6Nintendo1
8New Super Mario Bros. WiiWiiPlatformNintendo8.4Nintendo1
11Mario Kart DSDSRacingNintendo8.6Nintendo1
13Wii FitWiiSportsNintendo7.7Nintendo1
14Kinect Adventures!X360MiscMicrosoft Game Studios6.3Good Science Studio1
15Wii Fit PlusWiiSportsNintendo7.4Nintendo1
\n", + "
" + ], + "text/plain": [ + " Name Platform Genre Publisher \\\n", + "0 Wii Sports Wii Sports Nintendo \n", + "2 Mario Kart Wii Wii Racing Nintendo \n", + "3 Wii Sports Resort Wii Sports Nintendo \n", + "6 New Super Mario Bros. DS Platform Nintendo \n", + "7 Wii Play Wii Misc Nintendo \n", + "8 New Super Mario Bros. Wii Wii Platform Nintendo \n", + "11 Mario Kart DS DS Racing Nintendo \n", + "13 Wii Fit Wii Sports Nintendo \n", + "14 Kinect Adventures! X360 Misc Microsoft Game Studios \n", + "15 Wii Fit Plus Wii Sports Nintendo \n", + "\n", + " User_Score Developer Rating \n", + "0 8 Nintendo 1 \n", + "2 8.3 Nintendo 1 \n", + "3 8 Nintendo 1 \n", + "6 8.5 Nintendo 1 \n", + "7 6.6 Nintendo 1 \n", + "8 8.4 Nintendo 1 \n", + "11 8.6 Nintendo 1 \n", + "13 7.7 Nintendo 1 \n", + "14 6.3 Good Science Studio 1 \n", + "15 7.4 Nintendo 1 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#The last 4 values listed have very small number of occurrences.\n", + "#We will group these into having one value/category\n", + "\n", + "find_replace = {\"Rating\" : {\"E\": 1, \"T\": 2, \"M\": 3, \"E10+\": 4, \"EC\": 5, \"K-A\": 5, \"RP\": 5, \"AO\": 5}}\n", + "df.replace(find_replace, inplace=True)\n", + "df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Now plot the newly generated rating values and their frequencies.\n", + "sb.countplot(x='Rating',data=df, palette='hls')" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "#Question: How does this plot compare to plot of original data?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3 – Label encoding\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Name object\n", + "Platform object\n", + "Genre object\n", + "Publisher object\n", + "User_Score object\n", + "Developer object\n", + "Rating int64\n", + "dtype: object" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#With this technique where each distinct value in a categorical variable is converted to a number.\n", + "#In this scenario you don’t get to pick the numeric value assigned to the value. It is system determined.\n", + "\n", + "#let's check the data types again\n", + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Name object\n", + "Platform object\n", + "Genre object\n", + "Publisher object\n", + "User_Score object\n", + "Developer object\n", + "Rating int64\n", + "Platform_Category category\n", + "dtype: object" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Our categorical variables are of ‘object’ data type. \n", + "#We need to convert to a category data type.\n", + "#In this example ‘Platform’ has a large-ish number of values and we want a quick way of \n", + "# converting them we can illustrate this by creating a new variable.\n", + "df[\"Platform_Category\"] = df[\"Platform\"].astype('category')\n", + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamePlatformGenrePublisherUser_ScoreDeveloperRatingPlatform_Category
0Wii SportsWiiSportsNintendo8Nintendo112
2Mario Kart WiiWiiRacingNintendo8.3Nintendo112
3Wii Sports ResortWiiSportsNintendo8Nintendo112
6New Super Mario Bros.DSPlatformNintendo8.5Nintendo12
7Wii PlayWiiMiscNintendo6.6Nintendo112
8New Super Mario Bros. WiiWiiPlatformNintendo8.4Nintendo112
11Mario Kart DSDSRacingNintendo8.6Nintendo12
13Wii FitWiiSportsNintendo7.7Nintendo112
14Kinect Adventures!X360MiscMicrosoft Game Studios6.3Good Science Studio114
15Wii Fit PlusWiiSportsNintendo7.4Nintendo112
16Grand Theft Auto VPS3ActionTake-Two Interactive8.2Rockstar North38
17Grand Theft Auto: San AndreasPS2ActionTake-Two Interactive9Rockstar North37
19Brain Age: Train Your Brain in Minutes a DayDSMiscNintendo7.9Nintendo12
23Grand Theft Auto VX360ActionTake-Two Interactive8.1Rockstar North314
24Grand Theft Auto: Vice CityPS2ActionTake-Two Interactive8.7Rockstar North37
26Brain Age 2: More Training in Minutes a DayDSPuzzleNintendo7.1Nintendo12
28Gran Turismo 3: A-SpecPS2RacingSony Computer Entertainment8.4Polyphony Digital17
29Call of Duty: Modern Warfare 3X360ShooterActivision3.4Infinity Ward, Sledgehammer Games314
32Call of Duty: Black OpsX360ShooterActivision6.3Treyarch314
34Call of Duty: Black Ops IIPS3ShooterActivision5.3Treyarch38
\n", + "
" + ], + "text/plain": [ + " Name Platform Genre \\\n", + "0 Wii Sports Wii Sports \n", + "2 Mario Kart Wii Wii Racing \n", + "3 Wii Sports Resort Wii Sports \n", + "6 New Super Mario Bros. DS Platform \n", + "7 Wii Play Wii Misc \n", + "8 New Super Mario Bros. Wii Wii Platform \n", + "11 Mario Kart DS DS Racing \n", + "13 Wii Fit Wii Sports \n", + "14 Kinect Adventures! X360 Misc \n", + "15 Wii Fit Plus Wii Sports \n", + "16 Grand Theft Auto V PS3 Action \n", + "17 Grand Theft Auto: San Andreas PS2 Action \n", + "19 Brain Age: Train Your Brain in Minutes a Day DS Misc \n", + "23 Grand Theft Auto V X360 Action \n", + "24 Grand Theft Auto: Vice City PS2 Action \n", + "26 Brain Age 2: More Training in Minutes a Day DS Puzzle \n", + "28 Gran Turismo 3: A-Spec PS2 Racing \n", + "29 Call of Duty: Modern Warfare 3 X360 Shooter \n", + "32 Call of Duty: Black Ops X360 Shooter \n", + "34 Call of Duty: Black Ops II PS3 Shooter \n", + "\n", + " Publisher User_Score Developer \\\n", + "0 Nintendo 8 Nintendo \n", + "2 Nintendo 8.3 Nintendo \n", + "3 Nintendo 8 Nintendo \n", + "6 Nintendo 8.5 Nintendo \n", + "7 Nintendo 6.6 Nintendo \n", + "8 Nintendo 8.4 Nintendo \n", + "11 Nintendo 8.6 Nintendo \n", + "13 Nintendo 7.7 Nintendo \n", + "14 Microsoft Game Studios 6.3 Good Science Studio \n", + "15 Nintendo 7.4 Nintendo \n", + "16 Take-Two Interactive 8.2 Rockstar North \n", + "17 Take-Two Interactive 9 Rockstar North \n", + "19 Nintendo 7.9 Nintendo \n", + "23 Take-Two Interactive 8.1 Rockstar North \n", + "24 Take-Two Interactive 8.7 Rockstar North \n", + "26 Nintendo 7.1 Nintendo \n", + "28 Sony Computer Entertainment 8.4 Polyphony Digital \n", + "29 Activision 3.4 Infinity Ward, Sledgehammer Games \n", + "32 Activision 6.3 Treyarch \n", + "34 Activision 5.3 Treyarch \n", + "\n", + " Rating Platform_Category \n", + "0 1 12 \n", + "2 1 12 \n", + "3 1 12 \n", + "6 1 2 \n", + "7 1 12 \n", + "8 1 12 \n", + "11 1 2 \n", + "13 1 12 \n", + "14 1 14 \n", + "15 1 12 \n", + "16 3 8 \n", + "17 3 7 \n", + "19 1 2 \n", + "23 3 14 \n", + "24 3 7 \n", + "26 1 2 \n", + "28 1 7 \n", + "29 3 14 \n", + "32 3 14 \n", + "34 3 8 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Now convert this new variable to numeric.\n", + "df[\"Platform_Category\"] = df[\"Platform_Category\"].cat.codes\n", + "df.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Platform\n", + "3DS 226\n", + "DC 14\n", + "DS 1255\n", + "GBA 517\n", + "GC 469\n", + "PC 760\n", + "PS 201\n", + "PS2 1478\n", + "PS3 947\n", + "PS4 252\n", + "PSP 543\n", + "PSV 149\n", + "Wii 997\n", + "WiiU 105\n", + "X360 1041\n", + "XB 733\n", + "XOne 186\n", + "Name: Platform, dtype: int64" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#The number assigned to the Platform_Category variable is based on the alphabetical \n", + "#ordering of the values in the Platform variable. \n", + "df.groupby(\"Platform\")[\"Platform\"].count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4-Using SciKit-Learn transform\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GenreGenre_Code
0Sports10
2Racing6
3Sports10
6Platform4
7Misc3
8Platform4
11Racing6
13Sports10
14Misc3
15Sports10
\n", + "
" + ], + "text/plain": [ + " Genre Genre_Code\n", + "0 Sports 10\n", + "2 Racing 6\n", + "3 Sports 10\n", + "6 Platform 4\n", + "7 Misc 3\n", + "8 Platform 4\n", + "11 Racing 6\n", + "13 Sports 10\n", + "14 Misc 3\n", + "15 Sports 10" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#SciKit-Learn has a number of functions to help with data encodings. \n", + "#The one we will look at in this example is the ‘fit_transform’ function.\n", + "\n", + "#This will perform a similar task to what we have seen in a previous example\n", + "#Let's use the fit_tranforms function to encode the Genre variable\n", + "\n", + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "le_make = LabelEncoder()\n", + "df[\"Genre_Code\"] = le_make.fit_transform(df[\"Genre\"])\n", + "df[[\"Genre\", \"Genre_Code\"]].head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Genre_Code\n", + "0 2177\n", + "1 435\n", + "2 435\n", + "3 875\n", + "4 566\n", + "5 336\n", + "6 863\n", + "7 770\n", + "8 1017\n", + "9 564\n", + "10 1500\n", + "11 335\n", + "Name: Genre_Code, dtype: int64" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#And we can see this comparison when we look at the frequency counts.\n", + "df.groupby(\"Genre_Code\")[\"Genre_Code\"].count()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamePlatformGenrePublisherUser_ScoreDeveloperRatingPlatform_CategoryGenre_Code
0Wii SportsWiiSportsNintendo8Nintendo11210
2Mario Kart WiiWiiRacingNintendo8.3Nintendo1126
3Wii Sports ResortWiiSportsNintendo8Nintendo11210
6New Super Mario Bros.DSPlatformNintendo8.5Nintendo124
7Wii PlayWiiMiscNintendo6.6Nintendo1123
8New Super Mario Bros. WiiWiiPlatformNintendo8.4Nintendo1124
11Mario Kart DSDSRacingNintendo8.6Nintendo126
13Wii FitWiiSportsNintendo7.7Nintendo11210
14Kinect Adventures!X360MiscMicrosoft Game Studios6.3Good Science Studio1143
15Wii Fit PlusWiiSportsNintendo7.4Nintendo11210
\n", + "
" + ], + "text/plain": [ + " Name Platform Genre Publisher \\\n", + "0 Wii Sports Wii Sports Nintendo \n", + "2 Mario Kart Wii Wii Racing Nintendo \n", + "3 Wii Sports Resort Wii Sports Nintendo \n", + "6 New Super Mario Bros. DS Platform Nintendo \n", + "7 Wii Play Wii Misc Nintendo \n", + "8 New Super Mario Bros. Wii Wii Platform Nintendo \n", + "11 Mario Kart DS DS Racing Nintendo \n", + "13 Wii Fit Wii Sports Nintendo \n", + "14 Kinect Adventures! X360 Misc Microsoft Game Studios \n", + "15 Wii Fit Plus Wii Sports Nintendo \n", + "\n", + " User_Score Developer Rating Platform_Category Genre_Code \n", + "0 8 Nintendo 1 12 10 \n", + "2 8.3 Nintendo 1 12 6 \n", + "3 8 Nintendo 1 12 10 \n", + "6 8.5 Nintendo 1 2 4 \n", + "7 6.6 Nintendo 1 12 3 \n", + "8 8.4 Nintendo 1 12 4 \n", + "11 8.6 Nintendo 1 2 6 \n", + "13 7.7 Nintendo 1 12 10 \n", + "14 6.3 Good Science Studio 1 14 3 \n", + "15 7.4 Nintendo 1 12 10 " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamePlatformPublisherUser_ScoreDeveloperRatingPlatform_CategoryGenre_Code
0Wii SportsWiiNintendo8Nintendo11210
2Mario Kart WiiWiiNintendo8.3Nintendo1126
3Wii Sports ResortWiiNintendo8Nintendo11210
6New Super Mario Bros.DSNintendo8.5Nintendo124
7Wii PlayWiiNintendo6.6Nintendo1123
8New Super Mario Bros. WiiWiiNintendo8.4Nintendo1124
11Mario Kart DSDSNintendo8.6Nintendo126
13Wii FitWiiNintendo7.7Nintendo11210
14Kinect Adventures!X360Microsoft Game Studios6.3Good Science Studio1143
15Wii Fit PlusWiiNintendo7.4Nintendo11210
\n", + "
" + ], + "text/plain": [ + " Name Platform Publisher User_Score \\\n", + "0 Wii Sports Wii Nintendo 8 \n", + "2 Mario Kart Wii Wii Nintendo 8.3 \n", + "3 Wii Sports Resort Wii Nintendo 8 \n", + "6 New Super Mario Bros. DS Nintendo 8.5 \n", + "7 Wii Play Wii Nintendo 6.6 \n", + "8 New Super Mario Bros. Wii Wii Nintendo 8.4 \n", + "11 Mario Kart DS DS Nintendo 8.6 \n", + "13 Wii Fit Wii Nintendo 7.7 \n", + "14 Kinect Adventures! X360 Microsoft Game Studios 6.3 \n", + "15 Wii Fit Plus Wii Nintendo 7.4 \n", + "\n", + " Developer Rating Platform_Category Genre_Code \n", + "0 Nintendo 1 12 10 \n", + "2 Nintendo 1 12 6 \n", + "3 Nintendo 1 12 10 \n", + "6 Nintendo 1 2 4 \n", + "7 Nintendo 1 12 3 \n", + "8 Nintendo 1 12 4 \n", + "11 Nintendo 1 2 6 \n", + "13 Nintendo 1 12 10 \n", + "14 Good Science Studio 1 14 3 \n", + "15 Nintendo 1 12 10 " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#And now we can drop the Genre variable from the dataframe as it is no longer needed. \n", + "#BUT you will need to have recorded the mapping between the original Genre values and \n", + "#the numeric values for future reference.\n", + "df = df.drop('Genre', axis=1)\n", + "df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}