{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### TU257 - Lab4 - Demo - Naive Baye\n", "#### Introduction to simple Classification\n", "The examples in this notebook illustrate some of the simple steps needed for classification.\n", "\n", "It is important to remember all the things we have covered in the previous weeks, as all of those\n", "apply to every Classification problem.\n", "But firstly, we will start with some simiple examples.\n", "\n", "Work through the first example, examining every step/cell. Add addition annotations and descriptions where you can.\n", "\n", "For the second example, there are very few comments/annotations. The exercise for you is to add these." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# load the iris dataset\n", "from sklearn.datasets import load_iris\n", "iris = load_iris()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'data': array([[5.1, 3.5, 1.4, 0.2],\n", " [4.9, 3. , 1.4, 0.2],\n", " [4.7, 3.2, 1.3, 0.2],\n", " [4.6, 3.1, 1.5, 0.2],\n", " [5. , 3.6, 1.4, 0.2],\n", " [5.4, 3.9, 1.7, 0.4],\n", " [4.6, 3.4, 1.4, 0.3],\n", " [5. , 3.4, 1.5, 0.2],\n", " [4.4, 2.9, 1.4, 0.2],\n", " [4.9, 3.1, 1.5, 0.1],\n", " [5.4, 3.7, 1.5, 0.2],\n", " [4.8, 3.4, 1.6, 0.2],\n", " [4.8, 3. , 1.4, 0.1],\n", " [4.3, 3. , 1.1, 0.1],\n", " [5.8, 4. , 1.2, 0.2],\n", " [5.7, 4.4, 1.5, 0.4],\n", " [5.4, 3.9, 1.3, 0.4],\n", " [5.1, 3.5, 1.4, 0.3],\n", " [5.7, 3.8, 1.7, 0.3],\n", " [5.1, 3.8, 1.5, 0.3],\n", " [5.4, 3.4, 1.7, 0.2],\n", " [5.1, 3.7, 1.5, 0.4],\n", " [4.6, 3.6, 1. , 0.2],\n", " [5.1, 3.3, 1.7, 0.5],\n", " [4.8, 3.4, 1.9, 0.2],\n", " [5. , 3. , 1.6, 0.2],\n", " [5. , 3.4, 1.6, 0.4],\n", " [5.2, 3.5, 1.5, 0.2],\n", " [5.2, 3.4, 1.4, 0.2],\n", " [4.7, 3.2, 1.6, 0.2],\n", " [4.8, 3.1, 1.6, 0.2],\n", " [5.4, 3.4, 1.5, 0.4],\n", " [5.2, 4.1, 1.5, 0.1],\n", " [5.5, 4.2, 1.4, 0.2],\n", " [4.9, 3.1, 1.5, 0.2],\n", " [5. , 3.2, 1.2, 0.2],\n", " [5.5, 3.5, 1.3, 0.2],\n", " [4.9, 3.6, 1.4, 0.1],\n", " [4.4, 3. , 1.3, 0.2],\n", " [5.1, 3.4, 1.5, 0.2],\n", " [5. , 3.5, 1.3, 0.3],\n", " [4.5, 2.3, 1.3, 0.3],\n", " [4.4, 3.2, 1.3, 0.2],\n", " [5. , 3.5, 1.6, 0.6],\n", " [5.1, 3.8, 1.9, 0.4],\n", " [4.8, 3. , 1.4, 0.3],\n", " [5.1, 3.8, 1.6, 0.2],\n", " [4.6, 3.2, 1.4, 0.2],\n", " [5.3, 3.7, 1.5, 0.2],\n", " [5. , 3.3, 1.4, 0.2],\n", " [7. , 3.2, 4.7, 1.4],\n", " [6.4, 3.2, 4.5, 1.5],\n", " [6.9, 3.1, 4.9, 1.5],\n", " [5.5, 2.3, 4. , 1.3],\n", " [6.5, 2.8, 4.6, 1.5],\n", " [5.7, 2.8, 4.5, 1.3],\n", " [6.3, 3.3, 4.7, 1.6],\n", " [4.9, 2.4, 3.3, 1. ],\n", " [6.6, 2.9, 4.6, 1.3],\n", " [5.2, 2.7, 3.9, 1.4],\n", " [5. , 2. , 3.5, 1. ],\n", " [5.9, 3. , 4.2, 1.5],\n", " [6. , 2.2, 4. , 1. ],\n", " [6.1, 2.9, 4.7, 1.4],\n", " [5.6, 2.9, 3.6, 1.3],\n", " [6.7, 3.1, 4.4, 1.4],\n", " [5.6, 3. , 4.5, 1.5],\n", " [5.8, 2.7, 4.1, 1. ],\n", " [6.2, 2.2, 4.5, 1.5],\n", " [5.6, 2.5, 3.9, 1.1],\n", " [5.9, 3.2, 4.8, 1.8],\n", " [6.1, 2.8, 4. , 1.3],\n", " [6.3, 2.5, 4.9, 1.5],\n", " [6.1, 2.8, 4.7, 1.2],\n", " [6.4, 2.9, 4.3, 1.3],\n", " [6.6, 3. , 4.4, 1.4],\n", " [6.8, 2.8, 4.8, 1.4],\n", " [6.7, 3. , 5. , 1.7],\n", " [6. , 2.9, 4.5, 1.5],\n", " [5.7, 2.6, 3.5, 1. ],\n", " [5.5, 2.4, 3.8, 1.1],\n", " [5.5, 2.4, 3.7, 1. ],\n", " [5.8, 2.7, 3.9, 1.2],\n", " [6. , 2.7, 5.1, 1.6],\n", " [5.4, 3. , 4.5, 1.5],\n", " [6. , 3.4, 4.5, 1.6],\n", " [6.7, 3.1, 4.7, 1.5],\n", " [6.3, 2.3, 4.4, 1.3],\n", " [5.6, 3. , 4.1, 1.3],\n", " [5.5, 2.5, 4. , 1.3],\n", " [5.5, 2.6, 4.4, 1.2],\n", " [6.1, 3. , 4.6, 1.4],\n", " [5.8, 2.6, 4. , 1.2],\n", " [5. , 2.3, 3.3, 1. ],\n", " [5.6, 2.7, 4.2, 1.3],\n", " [5.7, 3. , 4.2, 1.2],\n", " [5.7, 2.9, 4.2, 1.3],\n", " [6.2, 2.9, 4.3, 1.3],\n", " [5.1, 2.5, 3. , 1.1],\n", " [5.7, 2.8, 4.1, 1.3],\n", " [6.3, 3.3, 6. , 2.5],\n", " [5.8, 2.7, 5.1, 1.9],\n", " [7.1, 3. , 5.9, 2.1],\n", " [6.3, 2.9, 5.6, 1.8],\n", " [6.5, 3. , 5.8, 2.2],\n", " [7.6, 3. , 6.6, 2.1],\n", " [4.9, 2.5, 4.5, 1.7],\n", " [7.3, 2.9, 6.3, 1.8],\n", " [6.7, 2.5, 5.8, 1.8],\n", " [7.2, 3.6, 6.1, 2.5],\n", " [6.5, 3.2, 5.1, 2. ],\n", " [6.4, 2.7, 5.3, 1.9],\n", " [6.8, 3. , 5.5, 2.1],\n", " [5.7, 2.5, 5. , 2. ],\n", " [5.8, 2.8, 5.1, 2.4],\n", " [6.4, 3.2, 5.3, 2.3],\n", " [6.5, 3. , 5.5, 1.8],\n", " [7.7, 3.8, 6.7, 2.2],\n", " [7.7, 2.6, 6.9, 2.3],\n", " [6. , 2.2, 5. , 1.5],\n", " [6.9, 3.2, 5.7, 2.3],\n", " [5.6, 2.8, 4.9, 2. ],\n", " [7.7, 2.8, 6.7, 2. ],\n", " [6.3, 2.7, 4.9, 1.8],\n", " [6.7, 3.3, 5.7, 2.1],\n", " [7.2, 3.2, 6. , 1.8],\n", " [6.2, 2.8, 4.8, 1.8],\n", " [6.1, 3. , 4.9, 1.8],\n", " [6.4, 2.8, 5.6, 2.1],\n", " [7.2, 3. , 5.8, 1.6],\n", " [7.4, 2.8, 6.1, 1.9],\n", " [7.9, 3.8, 6.4, 2. ],\n", " [6.4, 2.8, 5.6, 2.2],\n", " [6.3, 2.8, 5.1, 1.5],\n", " [6.1, 2.6, 5.6, 1.4],\n", " [7.7, 3. , 6.1, 2.3],\n", " [6.3, 3.4, 5.6, 2.4],\n", " [6.4, 3.1, 5.5, 1.8],\n", " [6. , 3. , 4.8, 1.8],\n", " [6.9, 3.1, 5.4, 2.1],\n", " [6.7, 3.1, 5.6, 2.4],\n", " [6.9, 3.1, 5.1, 2.3],\n", " [5.8, 2.7, 5.1, 1.9],\n", " [6.8, 3.2, 5.9, 2.3],\n", " [6.7, 3.3, 5.7, 2.5],\n", " [6.7, 3. , 5.2, 2.3],\n", " [6.3, 2.5, 5. , 1.9],\n", " [6.5, 3. , 5.2, 2. ],\n", " [6.2, 3.4, 5.4, 2.3],\n", " [5.9, 3. , 5.1, 1.8]]),\n", " 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),\n", " 'frame': None,\n", " 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dayoutlooktemphumiditywindplay
0D1SunnyHotHighWeakNo
1D2SunnyHotHighStrongNo
2D3OvercastHotHighWeakYes
3D4RainMildHighWeakYes
4D5RainCoolNormalWeakYes
5D6RainCoolNormalStrongNo
6D7OvercastCoolNormalStrongYes
7D8SunnyMildHighWeakNo
8D9SunnyCoolNormalWeakYes
9D10RainMildNormalWeakYes
10D11SunnyMildNormalStrongYes
11D12OvercastMildHighStrongYes
12D13OvercastHotNormalWeakYes
13D14RainMildHighStrongNo
\n", "" ], "text/plain": [ " day outlook temp humidity wind play\n", "0 D1 Sunny Hot High Weak No\n", "1 D2 Sunny Hot High Strong No\n", "2 D3 Overcast Hot High Weak Yes\n", "3 D4 Rain Mild High Weak Yes\n", "4 D5 Rain Cool Normal Weak Yes\n", "5 D6 Rain Cool Normal Strong No\n", "6 D7 Overcast Cool Normal Strong Yes\n", "7 D8 Sunny Mild High Weak No\n", "8 D9 Sunny Cool Normal Weak Yes\n", "9 D10 Rain Mild Normal Weak Yes\n", "10 D11 Sunny Mild Normal Strong Yes\n", "11 D12 Overcast Mild High Strong Yes\n", "12 D13 Overcast Hot Normal Weak Yes\n", "13 D14 Rain Mild High Strong No" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "#Load in the dataset\n", "df = pd.read_csv('/Users/brendan.tierney/Dropbox/4-Datasets/play_tennis.csv')\n", "df.head(20)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(14, 6)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Number of row and features\n", "df.shape" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dayoutlooktemphumiditywindplay
0021010
1621000
2701011
3812011
4910111
51010100
61100101
71222010
81320111
9112111
10222101
11302001
12401111
13512000
\n", "
" ], "text/plain": [ " day outlook temp humidity wind play\n", "0 0 2 1 0 1 0\n", "1 6 2 1 0 0 0\n", "2 7 0 1 0 1 1\n", "3 8 1 2 0 1 1\n", "4 9 1 0 1 1 1\n", "5 10 1 0 1 0 0\n", "6 11 0 0 1 0 1\n", "7 12 2 2 0 1 0\n", "8 13 2 0 1 1 1\n", "9 1 1 2 1 1 1\n", "10 2 2 2 1 0 1\n", "11 3 0 2 0 0 1\n", "12 4 0 1 1 1 1\n", "13 5 1 2 0 0 0" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Transform the data in Text form\n", "#Use Encoding to do this quickly\n", "from sklearn import preprocessing \n", "\n", "#Setup the Label Encoder\n", "le=preprocessing.LabelEncoder()\n", "#Loop through the columns\n", "for col in df.columns:\n", " #transform the column\n", " df[col]=le.fit_transform(df[col])\n", "\n", "#Display the updated dataframe\n", "df" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dayoutlooktemphumiditywind
002101
162100
270101
381201
491011
\n", "
" ], "text/plain": [ " day outlook temp humidity wind\n", "0 0 2 1 0 1\n", "1 6 2 1 0 0\n", "2 7 0 1 0 1\n", "3 8 1 2 0 1\n", "4 9 1 0 1 1" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#There are various ways to do the next step\n", "#Here we separate the descriptive features from the Target features\n", "X = df.drop('play', axis=1)\n", "\n", "#Create a new dataframe (Y) to only contain the Target attribute\n", "Y = df['play']\n", "X.head()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# use Stratified sampling to divide the data\n", "from sklearn.model_selection import StratifiedShuffleSplit\n", "\n", "#Setup the sampling and splitting the data\n", "split = StratifiedShuffleSplit(n_splits=10, test_size = 0.2, random_state=18)\n", "\n", "#Create the Train and Test datasets\n", "for train_index, test_index in split.split(X, Y):\n", " train_set = df.loc[train_index]\n", " test_set = df.loc[test_index]" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "#Exercise: Modify the above cell to replace X, Y in the split function with the dataframe subsets\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(11, 6)\n", "(3, 6)\n" ] } ], "source": [ "#display the sizes of the Train and Test datasets\n", "print(train_set.shape)\n", "print(test_set.shape)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dayoutlooktemphumiditywindplay
10222101
4910111
71222010
3812011
11302001
9112111
13512000
0021010
1621000
61100101
81320111
\n", "
" ], "text/plain": [ " day outlook temp humidity wind play\n", "10 2 2 2 1 0 1\n", "4 9 1 0 1 1 1\n", "7 12 2 2 0 1 0\n", "3 8 1 2 0 1 1\n", "11 3 0 2 0 0 1\n", "9 1 1 2 1 1 1\n", "13 5 1 2 0 0 0\n", "0 0 2 1 0 1 0\n", "1 6 2 1 0 0 0\n", "6 11 0 0 1 0 1\n", "8 13 2 0 1 1 1" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_set" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dayoutlooktemphumiditywindplay
12401111
2701011
51010100
\n", "
" ], "text/plain": [ " day outlook temp humidity wind play\n", "12 4 0 1 1 1 1\n", "2 7 0 1 0 1 1\n", "5 10 1 0 1 0 0" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#This is a very small dataset\n", "test_set" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 7\n", "0 4\n", "Name: play, dtype: int64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_set['play'].value_counts()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 2\n", "0 1\n", "Name: play, dtype: int64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_set['play'].value_counts()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "#Separate the datasets into X, Y\n", "X_train = train_set.drop('play', axis=1)\n", "X_test = test_set.drop('play', axis=1)\n", "Y_train = train_set['play']\n", "Y_test = test_set['play']" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "#Import Gaussian Naive Bayes model\n", "from sklearn.naive_bayes import GaussianNB\n", "\n", "#Create a Gaussian Classifier\n", "model = GaussianNB()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GaussianNB()" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train (fit) the model using the training sets\n", "model.fit(X_train,Y_train)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "#Apply the model to the Test dataset to create the Predicted values\n", "predicted= model.predict(X_test) " ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 0, 1])" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicted" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
play01
row_0
001
111
\n", "
" ], "text/plain": [ "play 0 1\n", "row_0 \n", "0 0 1\n", "1 1 1" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Crosstab the columns to give a basic confusion matrix\n", "pd.crosstab(predicted, Y_test)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "#We can also get the prediction probabilities\n", "#These indicate how strong of a prediction the model made for each prediction\n", "#1=a very strong prediction\n", "#0=a very weak prediction\n", "Y_predict_prob = model.predict_proba(X_test)[:,1]" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1. , 0.1098402, 1. ])" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Y_predict_prob" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAEbCAYAAAD6TW79AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAavUlEQVR4nO3de5wcZZ3v8c83IQHkImrklgQSlqCguF5CXA6uZo1KQCFxcTEgKLtZokfisqA5sCsCcluVJYhrFCOEgHskXHQxIBfPctkgK5jI7ZAgEEMIkxAjlwQFJMz0b/+oGuxUZrqrSfdUV8/3/XrVK9NVTz/162H49dO/euppRQRmZlYeQ4oOwMzMGuPEbWZWMk7cZmYl48RtZlYyTtxmZiXjxG1mVjJO3B1C0hGSbpO0XtLLkh6VNFvS7i0630GS7pX0R0lNm1Mq6UxJTzerv5znC0mP9XP8sfT4mQ32O6GR50iamJ7n7Y2cxwYnJ+4OIOkC4GpgBXAs8BHgQmASMKdFp/0esB44GDiwif1ekvY5kP4IjJU0vnqnpAOAMenxRk0Azmig/b0kv8ffvIZz2SCzVdEB2JaRdBhwMjA9IuZVHfovSXNJkngrvBWYGxH/1cxOI6IL6Gpmnzm8QJI4pwFLqvZPA24D3tOqE0sSsHVEPA/c3arzWGfxiLv8TgLuzSRtACKiJyJu6n0saYSkyyU9I+lFSXf0McpcKelfJZ0kqUvSc5IWSNopPT4xLY0MBS5KP97PT4+FpJmZ/jYpfUjaSdIlktakZZZVkr7fX/t031hJ10l6XtLvJV0vae9Mm5B0oqTzJP1O0jpJcyRtnfP3uAA4Mk2kvQn1yHT/JiQdKGmhpKckvSDpfkmfqjp+HPBvVXGFpDuqX5+k90laTDKa/5tsqUTS30iqSJpU1e+Y9Hdwbs7XZB3KibvEJA0D/hdwc86nXEdShvgS8EmS//63Z5MgScKaBMwATgE+BpyXHuv9SA9wQfrz2Q2EPRt4H8kbzsHAPwP91sjTxHsrsC9wPHAcMJbkE8UbM82/COwOHAOcD3wWODFnXD8GdkljA/hL4M3p/qw9gbuA6cBhwI+AyyQdlR7/KcnvBpLfz4HA56ue/zrgcpKy0GTgl9kTRMQ1wFXAPEk7pm8klwGPA1/N+ZqsQ7lUUm5vArYGVtVrKGkycBAwsbe8Iek2YCUwiyTJ9XoFmBoR3Wm7/UjKBp/v/UifDkxXRkSjH+8nAHMi4qqqff9eo/3fAnsA+0TEijSee0jq+Z8F/qWq7cqIOC79+RZJBwF/DXyjXlARsV7SzSSv887035sjYkP6WqvbvjoKTxPqImAUyRvLlRHxO0kr07Z9/X62BU6OiJ9U9bNbH+1OAB4iuV7xAMmb9ISI2Fjv9Vhnc+LuDHlmdUwA1lXXpCPiBUk38KdRZq/be5N2ahmws6RhEfHKFsZ6PzBLUg/wnxHxaI647+1N2mncXZLu6iPun2UeLwPGk98C4JuSTgY+AfxDX40kvYFk1DsFGElSNgJYnfM8AdxUt1HEs5KOB24ANgJnRcQDOc9hHcylknJ7BniZZERaz27Auj72/xbIlhzWZx5vBEQyut9SM0lKNqcDj6TT7abVaL9bGmNW3ri3aSC2hcD2wLnAdsD1/bSbT1JqOp/k4u8BwLwGzvVcA6Pm20he6xDg+3Xa2iDhxF1i6ej3LvJNn3sK2LmP/bsAzzYppJeB4Zl9b6h+EBHrI+IfImJX4M+Be4D/m5Zj+jIQcffG9gLJ6PYk4Pr08SYkbUNS8z8jIr4dEbdFxBIa+3+pkXnvXyMZ0a8FvtnA86yDOXGX3zeB8ZI+kz0gaUha24YkQe4s6f1Vx18HfBT4eZNi6SK5iPjq+UkucvYpIh4kqa8PIZle2Jd7gPdIGlvV70iSem+z4q72XZKR9sX9HN+aJN6Xq+LZATg8025jeqyREf8mJE0EvgD8b5ILoUdJOuK19medwzXukouI6yXNBi5NL8b9BPgDSSL8HMnFx5sj4hZJ/w1cJelUkjLLl0gulJ3fpHD+AzhB0n0kFw//HtixuoGkn6ftHiIZeR5PMo96s5kVqfkkM1tuknQ60ENyY8vTJDcBNVVE3AHcUeP4hnQa3+mSngcqwKnABjZ9rb9O/z0xvQj8fEQ8kjcOSduTlF+uiohr033fA74raVFE/C7/q7JO4xF3B4iIL5LUXMcBPwT+H8nUuFtJRmu9pqbHvglcQ1K3/mBELG9SKF9N+z2HJOHeTzKFrdovSKb0XUtyt+cI4JD0xpvNRMTLwIdIEuGlJNPoVpHMjmlqqaQBR5O8MV0BXEQyHfCKTJs7Sd4QTyT51NDom8wFJG+qJ1Tt+xLJm3J/nwZskJC/uszMrFw84jYzKxknbjOzFpI0L12C4aF+jkvStyQtl/SgpHfX69OJ28ysteaTLG3Qn0NIrk+NI1lm4rv1OnTiNjNroYhYRO17DqYAV0TibmCnfpZAeFUppgNuNXykr6DaZl5ac2fRIVgbGjZiL9VvVdsrT6/InXOGv/nPPksyUu41NyLmNnC6kcCTVY+70n1P9feEUiRuM7MBVenJ3TRN0o0k6i3mxG1mlhWVgTzbamB01eNR1FmwzDVuM7OsSiX/tuUWAp9OZ5f8BbAhIvotk4BH3GZmm4kmjrglXQlMBEZI6iJZsmFYcp64GLgROBRYDrxIsgZ9TU7cZmZZPd312+QUEUfVOR5surRBXU7cZmZZDVycLIITt5lZ1sBenGyYE7eZWVZzLjq2jBO3mVlGMy9OtoITt5lZlkfcZmYl0/NK0RHU5MRtZpblUomZWcm4VGJmVjIecZuZlYxH3GZm5RIVX5w0MysXj7jNzErGNW4zs5LxIlNmZiXjEbeZWcm4xm1mVjJN/CKFVnDiNjPL8ojbzKxcInxx0sysXDziNjMrGc8qMTMrGY+4zcxKxrNKzMxKxqUSM7OScanEzKxknLjNzErGpRIzs5LxxUkzs5JxqcTMrGRcKjEzKxmPuM3MSsaJ28ysZCKKjqAmJ24zs6xuzyoxMyuXNr84OaToAMzM2k6lkn+rQ9JkSY9IWi7p1D6O7yHpdkn3SXpQ0qH1+nTiNjPLisi/1SBpKDAHOATYDzhK0n6ZZqcBV0fEu4BpwHfqhedSiZlZVvNmlUwAlkfECgBJC4ApwLKqNgHsmP78emBNvU6duM3MshpI3JJmADOqds2NiLnpzyOBJ6uOdQHvzXRxJvAzSV8AtgM+VO+cTtxmZhnRk//LgtMkPbduw/4dBcyPiAskHQj8QNLbI/q/QurEbWaW1bxSyWpgdNXjUem+atOByQAR8QtJ2wAjgHX9deqLk2ZmWVHJv9W2GBgnaayk4SQXHxdm2qwCJgFI2hfYBvhdrU494jYzy6o0587JiOiWNBO4BRgKzIuIpZLOApZExELgi8D3JZ1EcqHyuIja01WcuM3Mspq4VklE3AjcmNl3etXPy4CDGunTidvMLKuBi5NFcI27RA7+yESWPrSIXy/7Of9n1glFh2Nt4LTzZvP+j05j6jGfKzqUztLEOydbwYm7JIYMGcK3LjqXjx12DPv/+V/xyU9OZd99xxUdlhVs6qEf5uLZ5xQdRuepRP6tAANSKpH0VpK7hUamu1YDCyPi4YE4fyeYcMC7+M1vVvL446sAuPrqn3D4YQfz8MOPFRyZFWn8O/dn9VO/LTqMzjPYF5mSdAqwABDwy3QTcGVfC65Y33YfuStPdv3pTtiu1U+x++67FhiRWQfziJvpwNsi4pXqnZJmA0uBr/X1pOrbSDX09QwZsl2r4zQzAyD8DThUgN2BJzL7d0uP9an6NtKtho9s76+jGABrVq9l9KjdX308auRurFmztsCIzDpYm88qGYjE/Y/ArZIe40+LrewB7A3MHIDzd4TFS+5n773HMmbMaFavXsuRR07h2E97ZolZSxRUAsmr5Yk7Im6WtA/J8obVFycXR0R7v621kZ6eHk78x9O48ac/ZOiQIcy//CqWLXu06LCsYLPO+BqL73uQ9eufZ9LUY/j89GM54rCDiw6r/Nq8VKI6d1a2BZdKrC8vrbmz6BCsDQ0bsZe2tI8XTp+WO+dsd9aCLT5fo3znpJlZVptPB3TiNjPLGuw1bjOzsonu9r785sRtZpblEbeZWcm4xm1mVjIecZuZlUs4cZuZlYwvTpqZlYxH3GZmJePEbWZWLu2+FIgTt5lZlkfcZmYl48RtZlYu0e0bcMzMyqW987YTt5lZlm/AMTMrGyduM7OScanEzKxcXCoxMyuZ6HbiNjMrF5dKzMzKpc2/R8GJ28xsM07cZmbl0u4j7iFFB2Bm1m6iO/9Wj6TJkh6RtFzSqf20OVLSMklLJf2wXp8ecZuZZTRrxC1pKDAH+DDQBSyWtDAillW1GQf8E3BQRDwnaed6/XrEbWaWEZX8Wx0TgOURsSIiNgILgCmZNscDcyLiOYCIWFevUyduM7OsUO5N0gxJS6q2GVU9jQSerHrcle6rtg+wj6S7JN0taXK98FwqMTPLaKRUEhFzgblbcLqtgHHARGAUsEjS/hGxvtYTzMysSlTUrK5WA6OrHo9K91XrAu6JiFeAxyU9SpLIF/fXqUslZmYZlR7l3upYDIyTNFbScGAasDDT5jqS0TaSRpCUTlbU6jRX4pZ0kKQ90593ljRf0qXpSczMOkqzLk5GRDcwE7gFeBi4OiKWSjpL0uFps1uAZyQtA24HZkXEM7X6VZ5vM5b0/4GPR8RySZeRDPf/CLwYEZ+s28EW2mr4yPZe8cUK8dKaO4sOwdrQsBF7bXGd48kDJuXOOaMX39q0ukpeeWvco9OkLeBjwNuAF6kznDczK6Mc49lC5U3c3ZK2BfYF1kbEOklDgG1bF5qZWTGaeHGyJfIm7tuAq4E3kRTSISmgr21BTGZmhcpx0bFQeRP3DGAWsBH4Rrpvb+DbrQjKzKxIHTHiTieCfzmz74ZWBGRmVrSIkiZuSUfn6SAi6q5kZWZWJu2+rGutEfe5OZ4fgBO3mXWUSllH3BExdiADMTNrF6UtlZiZDVYdMaskvfHm74FJwJuBV19VRHywNaGZmRWj3WeV5F1k6lzgbJJ1Zf8C+BWwH3B/a8IyMytOJZR7K0LexH00cHBEzAI2pv9OBca0KC4zs8JEKPdWhLw17jdGxAPpzz2ShkbE3ZL+qlWBmZkVpVPWKlktaY+IWEWysNQhkp4GXmldaGZmxSjtdMCM7wLvAVYBF5KsVyLgjNaEZWZWnEqbX5zMe8v7t6p+vlLSncD2EfHrlkVmZlaQThlxbyIiupodSC1eMN/MBlJH3IAj6TGS29s3ExH7NDUiM7OCdcqI+5zM45EkN+Rc0txwzMyK1+aTSnLXuC/P7pN0PfCvwHnNDsrMrEg9lby3uBRjS9YqWQoc2KxAzMzaRZuv6pq7xr17Ztd2wN+RTA80M+soQWfUuLvYtOwjYCXwmWYHZGZWtEqbF7nzJu7s2ty/j4hnmx2MmVk7qHTCiDsinmh1IGZm7aLdSyW5Lp1K+pmkD2b2TZJ0U2vCMjMrTg/KvRUh75yXdwOLMvsWAQc0Nxwzs+JVGtiKkLfGXQGGAd2Z57b35wkzs9eg3acD5h1x/wr4QmbfTODe5oZjZla8QLm3IuQdcZ8C3CHpCOBRYJ90m9iiuMzMCtPmq7rmG3FHxIMk3zH5I+B54BpgCjC9daGZmRWjgnJvRch9Q35ErAUuAn4JfBy4neSipZlZR+lpYCtC3lve9wM+CxwDvI4k4R8SET9rYWxmZoWoqL1rJTVH3JKOTb/t5iHgA8CZJEu6Pgvc3+rgzMyKEA1sRag34r4ceAb4aES8erON2vzdyMxsS5R9OuBXgD8A10n6D0mHSWrvhWrNzLZQRfm3eiRNlvSIpOWSTq3R7ghJIWl8vT5rJuGIOBfYC5hK8qngR8BqYCcgu9SrmVlHaNYt75KGAnOAQ0hm5h2VXjPMttsBOBG4J098dUfPkbgpIv4a2BP4DrAWWCzp6jwnMTMrkyaOuCcAyyNiRURsBBaQTKXOOhv4OvDHPPE1VPaIiKci4mySUfgUYHgjzzczK4NG1iqRNEPSkqptRlVXI4Enqx53pfteJendwOiI+Gne+F7TV5dFRAA3ppuZWUdpZLZIRMwF5r6W86TXDGcDxzXyvC35zkkzs47UxFveVwOjqx6PSvf12gF4O8mSIgC7AgslHR4RS/rr1InbzCyjidMBFwPjJI0lSdjTgKN7D0bEBmBE72NJdwBfqpW0wYnbzGwzPU0acUdEt6SZwC3AUGBeRCyVdBawJCIWvpZ+nbjNzDKaeQNORGx2PTAiTu+n7cQ8fTpxm5lltPudk07cZmYZRa1BkpcTt5lZRrt/kYITt5lZhkslZmYlU9QXJOTlxG1mluFSiZlZybhUYmZWMp5VYmZWMpU2T91O3GZmGb44aWZWMq5xm5mVjGeVmJmVjGvcZmYl095p24nbzGwzrnGbmZVMT5uPuZ24zcwyPOI2MysZX5w0MyuZ9k7bTtxmZptxqcTMrGR8cdLMrGRc47amOO282Sy665e88Q07cd2/X1x0ONYm/HfRGu2dtmFI0QFYPlMP/TAXzz6n6DCszfjvojUqRO6tCE7cJTH+nfvz+h13KDoMazP+u2iNSgNbEVwqMTPLiDYvlhQ64pb0tzWOzZC0RNKSS664ciDDMrNBrofIvRWh6BH3V4HL+joQEXOBuQCvPL2ivd/+zKyjDPp53JIe7O8QsEurz29m1qhKtPdYcSBG3LsABwPPZfYL+O8BOH9HmHXG11h834OsX/88k6Yew+enH8sRhx1cdFhWMP9dtEZ7p+2BSdw3ANtHxP3ZA5LuGIDzd4Tzv3pq0SFYG/LfRWsM+htwImJ6jWNHt/r8ZmaNavdZJUVfnDQzazvdTtxmZuXS7iNu3zlpZpbRzDsnJU2W9Iik5ZI2uygh6WRJyyQ9KOlWSXvW69OJ28wsIyJyb7VIGgrMAQ4B9gOOkrRfptl9wPiIeAdwLfCNevE5cZuZZTRxkakJwPKIWBERG4EFwJTqBhFxe0S8mD68GxhVr1MnbjOzjEZuea9eniPdZlR1NRJ4supxV7qvP9OBm+rF54uTZmYZjczjrl6eY0tIOgYYD3ygXlsnbjOzjHq16wasBkZXPR6V7tuEpA8BXwY+EBEv1+vUpRIzs4wmzipZDIyTNFbScGAasLC6gaR3Ad8DDo+IdXni84jbzCyjWfO4I6Jb0kzgFmAoMC8ilko6C1gSEQuB84HtgWskAayKiMNr9evEbWaW0cy1SiLiRuDGzL7Tq37+UKN9OnGbmWX0RHuvyO3EbWaW0e63vDtxm5ll+IsUzMxKpr3TthO3mdlmBv0XKZiZlY0Tt5lZyXhWiZlZyXhWiZlZyTRxrZKWcOI2M8twjdvMrGQ84jYzK5meXN8mWRwnbjOzDN85aWZWMp5VYmZWMh5xm5mVjEfcZmYl4xG3mVnJ+JZ3M7OScanEzKxkwiNuM7Ny8S3vZmYl41vezcxKxiNuM7OS6am4xm1mViqeVWJmVjKucZuZlYxr3GZmJeMRt5lZyfjipJlZybhUYmZWMi6VmJmVjJd1NTMrGc/jNjMrGY+4zcxKptLmy7oOKToAM7N2ExG5t3okTZb0iKTlkk7t4/jWkq5Kj98jaUy9Pp24zcwympW4JQ0F5gCHAPsBR0naL9NsOvBcROwNXAh8vV58TtxmZhnRwFbHBGB5RKyIiI3AAmBKps0U4PL052uBSZJUq9NS1LiHjdir5osYTCTNiIi5Rcdh7cV/F83VvXF17pwjaQYwo2rX3Kr/FiOBJ6uOdQHvzXTxapuI6Ja0AXgT8HR/5/SIu3xm1G9ig5D/LgoSEXMjYnzV1vI3UCduM7PWWQ2Mrno8Kt3XZxtJWwGvB56p1akTt5lZ6ywGxkkaK2k4MA1YmGmzEPhM+vMngNuizlXPUtS4bROuY1pf/HfRhtKa9UzgFmAoMC8ilko6C1gSEQuBS4EfSFoOPEuS3GtSuy+mYmZmm3KpxMysZJy4zcxKxom7ROrdOmuDj6R5ktZJeqjoWGzgOHGXRM5bZ23wmQ9MLjoIG1hO3OWR59ZZG2QiYhHJTAQbRJy4y6OvW2dHFhSLmRXIidvMrGScuMsjz62zZjYIOHGXR55bZ81sEHDiLomI6AZ6b519GLg6IpYWG5UVTdKVwC+At0jqkjS96Jis9XzLu5lZyXjEbWZWMk7cZmYl48RtZlYyTtxmZiXjxG1mVjJO3FYKkt4nKaoeXyzp2wMcw39KOnMgz2nWFyduawpJd0h6WdIfJG2QdJ+kI1p1voj4XETMbCC201oVi9lAc+K2Zjo7IrYH3gRcCVwlaZ/qBpKGFRKZWQdx4ramS+/y/A7Jl6PuL6lb0rGSVpAuQSppD0nXSlor6SlJcyXt0NuHpHHpSPn3kh4AxlefQ9J8SZdUPX6zpEslrZL0vKR7Jb0lLaf8JfCV9NPAI1XPOV7SQ1WfED5SdUyS/im9G/FZSRcCas1vzKwxTtzWdOlaKicArwBvIEnghwLvAnaRtA1wG7AMGEvyxRCjgIvS528F3AAsBXYGPgF8rsb5hpCs27ITcED673HA79Nyyp2knwYi4i3pc44HTgE+lcb4ZeDHkvZOuz0GOIlkzfNdgaeB92/J78WsWZy4rZm+LGk9yVrhU4AjgOXpsVMiYkNEvAh8jGS5hdMj4qWIeA74CvCp9Jt+3guMAWalxx8DLqhx3vHp9ncR8duIqETEgxGxpsZzTgTOiogH0vY3AreTLN4F8GngexHxq/SLK/4FWNvoL8SsFbYqOgDrKOdGxDnVOyRNBCps+iUQY4E90iRfLUhGt6OAdWmS7/V4jfOOSdtvaCDWscAcSd+q2rcVyZsOaQwrXw0soiLpiQb6N2sZJ24bCBGbrmb2BPBoRLytr8aSVgM7S3pdVfIeU6P/lWn7HSPi+T6OV/rY9wRwRkRc00+fq6vPKUnAnjViMBswLpVYEW4Ahkv6Z0k7pBcCR0r6eHr8bpLE+nVJ20r6M+DkGv0tAe4FLpG0s6Qhkt4haff0+Fpg78xzLgTOlPTO9PzbpnPF35oe/wEwQ9K705kwp5J8GjArnBO3Dbh0FP1BkouSvwY2ALcC70yPdwOHA+8A1gE/BubW6K8CHAa8BNwPrAfmAdunTS4ExktaL2lp+pzvA98ALgOeA1aR1Nl7pyteAfwbcD3wW5KLpIu27JWbNYfX4zYzKxmPuM3MSsaJ28ysZJy4zcxKxonbzKxknLjNzErGidvMrGScuM3MSsaJ28ysZP4HMIKCCecUJQ4AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "from sklearn.metrics import confusion_matrix\n", "import matplotlib.pyplot as plt\n", "\n", "# passing actual and predicted values\n", "cm = confusion_matrix(Y_test, predicted)\n", "\n", "# true write data values in each cell of the matrix\n", "sns.heatmap(cm, annot=True)\n", "plt.title('Confusion Matrix', fontsize = 15) \n", "plt.xlabel('Predicted', fontsize = 13) \n", "plt.ylabel('Acuals', fontsize = 13) \n", "\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }