File size: 47,184 Bytes
567bb77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>preg</th>\n",
       "      <th>plas</th>\n",
       "      <th>pres</th>\n",
       "      <th>skin</th>\n",
       "      <th>test</th>\n",
       "      <th>mass</th>\n",
       "      <th>pedi</th>\n",
       "      <th>age</th>\n",
       "      <th>class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>148</td>\n",
       "      <td>72</td>\n",
       "      <td>35</td>\n",
       "      <td>0</td>\n",
       "      <td>33.6</td>\n",
       "      <td>0.627</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>85</td>\n",
       "      <td>66</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "      <td>26.6</td>\n",
       "      <td>0.351</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>183</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23.3</td>\n",
       "      <td>0.672</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>89</td>\n",
       "      <td>66</td>\n",
       "      <td>23</td>\n",
       "      <td>94</td>\n",
       "      <td>28.1</td>\n",
       "      <td>0.167</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>137</td>\n",
       "      <td>40</td>\n",
       "      <td>35</td>\n",
       "      <td>168</td>\n",
       "      <td>43.1</td>\n",
       "      <td>2.288</td>\n",
       "      <td>33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   preg  plas  pres  skin  test  mass   pedi  age  class\n",
       "0     6   148    72    35     0  33.6  0.627   50      1\n",
       "1     1    85    66    29     0  26.6  0.351   31      0\n",
       "2     8   183    64     0     0  23.3  0.672   32      1\n",
       "3     1    89    66    23    94  28.1  0.167   21      0\n",
       "4     0   137    40    35   168  43.1  2.288   33      1"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Import the data set\n",
    "\n",
    "import pandas as pd\n",
    "columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']\n",
    "data = pd.read_csv('/Users/brendan.tierney/Dropbox/4-Datasets/pima-indians-diabetes.csv', names=columns)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(768, 9)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    500\n",
       "1    268\n",
       "Name: class, dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['class'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEFCAYAAAAYKqc0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQN0lEQVR4nO3df6zddX3H8edrVECFUaB3FVuwbNQ5zAKaihB/xEk2BefKH8pQp5WwNFkg0TB/dGoUjS64ZAPNnFkzDFX8AUORTpgOUaJG+VEUUESlY7C2Aq3QVpT5A3nvj/MpnNZ7e2/be++hnz4fycn5fD+fz/d836e9ffV7P+d7zklVIUnqy++MugBJ0vQz3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4SxNIMpbkB0mePOpaxpPkgFbf2Khr0ROP4a6RSvLaJGuS/CzJvUn+M8kLZ+G4leSYSaatAC6uqv9r+1yX5K9nuraJ7Hj8qvol8DEGdUrbMdw1MknOBS4E/h6YDxwF/AuwdIRlAYOzYmAZcMk0Puac6XqsIZ8ClrV6pccY7hqJJIcA7wPOrqrPVdXPq+rXVfUfVfXWNueAJBcm+XG7XbgtxJK8Mck3dnjMx87Gk1yc5CNJrkryUJIbkvxBG/ta2+XW9hvDX45T4vOBLVW1vu3zAeBFwD+3ff659X8oybokP01yc5IXDdVzXpLLk1yS5KfAG5McneRrraYvtxovGdrnxCTfTLIlya1JXrKz47f6NgMn7v7fhnpkuGtUTgIOBK7YyZx3Mgit44HjgBOAd+3CMc4A3gscCqwFPgBQVS9u48dV1UFVdek4+/4x8MNtG1X1TuDrwDltn3Pa0E2tvsMYnEX/e5IDhx5nKXA5MBf4ZJtzI3A4cB7w+m0TkywArgLe3x7vLcBnk4zt5PgAdzD485EeY7hrVA4HflJVj+xkzuuA91XVxqraxCCoX7+T+Tu6oqpubMf4JIMQnqq5wEOTTaqqS6rqgap6pKr+ETgA+MOhKd+qqs9X1aPAGPA84N1V9auq+gawemjuXwFXV9XVVfVoVV0DrAFOnaSMh1q90mMMd43KA8C8Sdahnw7cM7R9T+ubqvuG2g8DB+3CvpuBgyeblOQtSe5IsjXJFuAQYN7QlHVD7acDD1bVwxOMPwN4dVuS2dIe74XAEZOUcTCwZbJatW8x3DUq3wJ+CZy2kzk/ZhB42xzV+gB+Djxl20CSp01zfbcBz9yhb7uPUG3r628DTgcOraq5wFYgE+xzL3BYkqcM9R051F4HfKKq5g7dnlpV5493/CF/BNw6heekfYjhrpGoqq3Au4GPJDktyVOSPCnJKUn+oU37NPCudr35vDZ/24uPtwLPTnJ8W+M+bxdLuB/4/Z2M3wjMbevgE+1zMPAIsAmYk+TdwO9O9IBVdQ+DZZbzkuyf5CTglUNTLgFemeRlSfZLcmCSlyRZOFHNrb7DgOt38ly0DzLcNTJtjfpcBi+SbmJw5noO8Pk25f0MwvA24LvAt1sfVfUjBlfbfBm4E9juypkpOA9Y1ZY/Th+ntl8BFzNYB9/mQ8CrkmxO8mHgS8AXgR8xWDL6Bdsvs4zndQxeTH6gPZdLGfwGQ1WtY/AC7Dt4/M/jrTz+73TH4wO8FljVrnmXHhO/rEMaX3vn59eB52x7I9MMHONS4AdV9Z7d2PcABr/BvLiqNk57cdqrGe7SLEryPOBB4H+AP2PwW8pJVfWdUdal/szEO+YkTexpwOcYXAq6Hvgbg10zwTN3SeqQL6hKUocMd0nq0BNizX3evHm1aNGiUZchSXuVm2+++SdVNe7n+T8hwn3RokWsWbNm1GVI0l4lyT0TjbksI0kdMtwlqUOGuyR1yHCXpA4Z7pLUoSmFe5K7k3w3yS1J1rS+w5Jck+TOdn9o60+SDydZm+S2JM+dyScgSfptu3Lm/idVdXxVLWnbK4Brq2oxcG3bBjgFWNxuy4GPTlexkqSp2ZNlmaXAqtZexePfqLMU+HgNXM/gCw8m+5owSdI0muqbmAr4ryQF/GtVrQTmV9W9bfw+YH5rL2D7LyxY3/ruHeojyXIGZ/YcddRRu1f9LFu04qpRl9CVu89/xahLkLo11XB/YVVtSPJ7wDVJfjA8WFXVgn/K2n8QKwGWLFniR1NK0jSa0rJMVW1o9xuBK4ATgPu3Lbe0+23fBLOB7b/0d2HrkyTNkknDPclTkxy8rc3g22O+B6wGlrVpy4ArW3s18IZ21cyJwNah5RtJ0iyYyrLMfOCKJNvmf6qqvpjkJuCyJGcx+HLgbV8yfDVwKrAWeBg4c9qrliTt1KThXlV3AceN0/8AcPI4/QWcPS3VSZJ2i+9QlaQOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHVoyuGeZL8k30nyhbZ9dJIbkqxNcmmS/Vv/AW17bRtfNEO1S5ImsCtn7m8C7hja/iBwQVUdA2wGzmr9ZwGbW/8FbZ4kaRZNKdyTLAReAfxb2w7wUuDyNmUVcFprL23btPGT23xJ0iyZ6pn7hcDbgEfb9uHAlqp6pG2vBxa09gJgHUAb39rmS5JmyaThnuTPgY1VdfN0HjjJ8iRrkqzZtGnTdD60JO3zpnLm/gLgL5LcDXyGwXLMh4C5Sea0OQuBDa29ATgSoI0fAjyw44NW1cqqWlJVS8bGxvboSUiStjdpuFfV31XVwqpaBJwBfKWqXgd8FXhVm7YMuLK1V7dt2vhXqqqmtWpJ0k7tyXXubwfOTbKWwZr6Ra3/IuDw1n8usGLPSpQk7ao5k095XFVdB1zX2ncBJ4wz5xfAq6ehNknSbvIdqpLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOTRruSQ5McmOSW5PcnuS9rf/oJDckWZvk0iT7t/4D2vbaNr5ohp+DJGkHUzlz/yXw0qo6DjgeeHmSE4EPAhdU1THAZuCsNv8sYHPrv6DNkyTNoknDvQZ+1jaf1G4FvBS4vPWvAk5r7aVtmzZ+cpJMV8GSpMlNac09yX5JbgE2AtcA/w1sqapH2pT1wILWXgCsA2jjW4HDp7FmSdIkphTuVfWbqjoeWAicADxrTw+cZHmSNUnWbNq0aU8fTpI0ZJeulqmqLcBXgZOAuUnmtKGFwIbW3gAcCdDGDwEeGOexVlbVkqpaMjY2tnvVS5LGNZWrZcaSzG3tJwN/CtzBIORf1aYtA65s7dVtmzb+laqqaaxZkjSJOZNP4QhgVZL9GPxncFlVfSHJ94HPJHk/8B3gojb/IuATSdYCDwJnzEDdkqSdmDTcq+o24Dnj9N/FYP19x/5fAK+eluokSbvFd6hKUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOjSVd6hKeoJbtOKqUZfQlbvPf8WoS9hjnrlLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6NGm4JzkyyVeTfD/J7Une1PoPS3JNkjvb/aGtP0k+nGRtktuSPHemn4QkaXtTOXN/BPjbqjoWOBE4O8mxwArg2qpaDFzbtgFOARa323Lgo9NetSRppyYN96q6t6q+3doPAXcAC4ClwKo2bRVwWmsvBT5eA9cDc5McMd2FS5Imtktr7kkWAc8BbgDmV9W9beg+YH5rLwDWDe22vvVJkmbJlMM9yUHAZ4E3V9VPh8eqqoDalQMnWZ5kTZI1mzZt2pVdJUmTmFK4J3kSg2D/ZFV9rnXfv225pd1vbP0bgCOHdl/Y+rZTVSuraklVLRkbG9vd+iVJ45jK1TIBLgLuqKp/GhpaDSxr7WXAlUP9b2hXzZwIbB1avpEkzYI5U5jzAuD1wHeT3NL63gGcD1yW5CzgHuD0NnY1cCqwFngYOHM6C5YkTW7ScK+qbwCZYPjkceYXcPYe1iVJ2gO+Q1WSOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUoUnDPcnHkmxM8r2hvsOSXJPkznZ/aOtPkg8nWZvktiTPncniJUnjm8qZ+8XAy3foWwFcW1WLgWvbNsApwOJ2Ww58dHrKlCTtiknDvaq+Bjy4Q/dSYFVrrwJOG+r/eA1cD8xNcsQ01SpJmqLdXXOfX1X3tvZ9wPzWXgCsG5q3vvVJkmbRHr+gWlUF1K7ul2R5kjVJ1mzatGlPy5AkDdndcL9/23JLu9/Y+jcARw7NW9j6fktVrayqJVW1ZGxsbDfLkCSNZ3fDfTWwrLWXAVcO9b+hXTVzIrB1aPlGkjRL5kw2IcmngZcA85KsB94DnA9cluQs4B7g9Db9auBUYC3wMHDmDNQsSZrEpOFeVa+ZYOjkceYWcPaeFiVJ2jO+Q1WSOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjo0I+Ge5OVJfphkbZIVM3EMSdLEpj3ck+wHfAQ4BTgWeE2SY6f7OJKkic3EmfsJwNqququqfgV8Blg6A8eRJE1gzgw85gJg3dD2euD5O05KshxY3jZ/luSHM1DLvmoe8JNRFzGZfHDUFWgE/NmcXs+YaGAmwn1KqmolsHJUx+9ZkjVVtWTUdUg78mdz9szEsswG4Mih7YWtT5I0S2Yi3G8CFic5Osn+wBnA6hk4jiRpAtO+LFNVjyQ5B/gSsB/wsaq6fbqPo51yuUtPVP5szpJU1ahrkCRNM9+hKkkdMtwlqUOGuyR1aGTXuWt6JHkWg3cAL2hdG4DVVXXH6KqSNGqeue/Fkrydwcc7BLix3QJ82g9s0xNZkjNHXUPvvFpmL5bkR8Czq+rXO/TvD9xeVYtHU5m0c0n+t6qOGnUdPXNZZu/2KPB04J4d+o9oY9LIJLltoiFg/mzWsi8y3PdubwauTXInj39Y21HAMcA5oypKauYDLwM279Af4JuzX86+xXDfi1XVF5M8k8HHLA+/oHpTVf1mdJVJAHwBOKiqbtlxIMl1s17NPsY1d0nqkFfLSFKHDHdJ6pDhLkkdMtwlqUOGuyR16P8BtIgOjfRADRgAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#print bar chart\n",
    "data['class'].value_counts().plot(kind='bar', title='Count (target)');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Down Sampling - Majority Class - Using Random Sampling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Class = 0  500\n",
      "Class = 1  268\n"
     ]
    }
   ],
   "source": [
    "count_class_0, count_class_1 = data['class'].value_counts()\n",
    "\n",
    "# Divide by class\n",
    "df_class_0 = data[data['class'] == 0] #majority class\n",
    "df_class_1 = data[data['class'] == 1] #minority class\n",
    "\n",
    "print('Class = 0 ', df_class_0.shape[0])\n",
    "print('Class = 1 ', df_class_1.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(268, 9)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Sample Majority class (y=0, to have same number of records as minority calls (y=1)\n",
    "df_class_0_under = df_class_0.sample(count_class_1)\n",
    "\n",
    "df_class_0_under.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random under-sampling:\n",
      "0    268\n",
      "1    268\n",
      "Name: class, dtype: int64\n",
      "Num records =  536\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEFCAYAAAAYKqc0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAPyklEQVR4nO3df6zddX3H8edrVHEKs2DvaimtZVq3QRarqYjxR1hMRFhMMdkY6LAal5qFJhp/bPgj2hlZ2DJ/RiWpkVAFEaag3WQ6bDRI/AGFQQUq0ihdWwu98lvZ0MJ7f5xv4fRyb+/ve+inz0dyc8/5fL/f831fuDx7+r3nXFJVSJLa8nuDHkCSNPOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLY0gylOSnSX5/0LOMJsnh3XxDg55FTz3GXQOV5I1JNif5dZLdSf4zySvn4LyV5AXj7HYucFFV/W93zPeS/O1szzaWkeevqkeAC+nNKe3HuGtgkrwL+CTwT8BCYCnwOWDVAMcCes+KgdXAxTP4mPNm6rH6fBlY3c0rPc64ayCSPBv4CHBOVV1RVb+pqt9V1b9X1Xu7fQ5P8skkv+w+PrkvYknekuTaEY/5+LPxJBcl+WySbyZ5KMmPkzy/23ZNd8jN3d8Y/nqUEV8G3F9VO7tjzgNeBXymO+Yz3fqnkuxI8mCSG5K8qm+edUm+muTiJA8Cb0lyXJJrupm+0814cd8xJyX5QZL7k9yc5OQDnb+b7z7gpKn/21CLjLsG5eXAM4ArD7DPB+hFawXwIuBE4IOTOMeZwD8CRwHbgPMAqurV3fYXVdURVXXZKMf+GXD7vjtV9QHg+8Da7pi13abru/mOpvcs+t+SPKPvcVYBXwXmA5d0+1wHPAdYB5y9b8cki4FvAh/tHu89wNeSDB3g/ABb6f3zkR5n3DUozwF+VVV7D7DPm4CPVNWeqhqmF+qzD7D/SFdW1XXdOS6hF+GJmg88NN5OVXVxVd1TVXur6mPA4cAf9+3yw6r6elU9BgwBLwU+VFW/raprgY19+/4NcFVVXVVVj1XV1cBm4LRxxniom1d6nHHXoNwDLBjnOvQxwPa++9u7tYm6q+/2w8ARkzj2PuDI8XZK8p4kW5M8kOR+4NnAgr5ddvTdPga4t6oeHmP784C/6i7J3N893iuBReOMcSRw/3iz6tBi3DUoPwQeAU4/wD6/pBe8fZZ2awC/AZ65b0OS587wfFuAF45Y2+9XqHbX1/8eOAM4qqrmAw8AGeOY3cDRSZ7Zt7ak7/YO4EtVNb/v41lVdf5o5+/zp8DNE/iadAgx7hqIqnoA+BDw2SSnJ3lmkqclOTXJv3S7XQp8sHu9+YJu/30/fLwZOCHJiu4a97pJjnA38EcH2H4dML+7Dj7WMUcCe4FhYF6SDwF/MNYDVtV2epdZ1iV5epKXA6/v2+Vi4PVJTklyWJJnJDk5ybFjzdzNdzTwowN8LToEGXcNTHeN+l30fkg6TO+Z61rg690uH6UXwy3AT4AbuzWq6mf0Xm3zHeAOYL9XzkzAOmBDd/njjFFm+y1wEb3r4Pt8CvjLJPcl+TTwbeBbwM/oXTL6P/a/zDKaN9H7YfI93ddyGb2/wVBVO+j9APb9PPHP47088d/pyPMDvBHY0L3mXXpc/J91SKPr3vn5feDF+97INAvnuAz4aVV9eArHHk7vbzCvrqo9Mz6cDmrGXZpDSV4K3Av8Angtvb+lvLyq/nuQc6k9s/GOOUljey5wBb2Xgu4E/s6wazb4zF2SGuQPVCWpQcZdkhr0lLjmvmDBglq2bNmgx5Ckg8oNN9zwq6oa9ff5PyXivmzZMjZv3jzoMSTpoJJk+1jbvCwjSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUoKfEm5gOFsvO/eagR2jKnef/xaBHaIbfmzOrhe9Nn7lLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoPGjXuSJUm+m+S2JLcmeUe3vi7JriQ3dR+n9R3zviTbktye5JTZ/AIkSU82kV8/sBd4d1XdmORI4IYkV3fbPlFV/9q/c5LjgTOBE4BjgO8keWFVPTqTg0uSxjbuM/eq2l1VN3a3HwK2AosPcMgq4CtV9UhV/QLYBpw4E8NKkiZmUtfckywDXgz8uFtam2RLkguTHNWtLQZ29B22kwP/YSBJmmETjnuSI4CvAe+sqgeBC4DnAyuA3cDHJnPiJGuSbE6yeXh4eDKHSpLGMaG4J3kavbBfUlVXAFTV3VX1aFU9BnyeJy697AKW9B1+bLe2n6paX1Urq2rl0NDQdL4GSdIIE3m1TIAvAFur6uN964v6dnsDcEt3eyNwZpLDkxwHLAeum7mRJUnjmcirZV4BnA38JMlN3dr7gbOSrAAKuBN4O0BV3ZrkcuA2eq+0OcdXykjS3Bo37lV1LZBRNl11gGPOA86bxlySpGnwHaqS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNGjfuSZYk+W6S25LcmuQd3frRSa5Ockf3+ahuPUk+nWRbki1JXjLbX4QkaX8Teea+F3h3VR0PnASck+R44FxgU1UtBzZ19wFOBZZ3H2uAC2Z8aknSAY0b96raXVU3drcfArYCi4FVwIZutw3A6d3tVcAXq+dHwPwki2Z6cEnS2CZ1zT3JMuDFwI+BhVW1u9t0F7Cwu70Y2NF32M5uTZI0RyYc9yRHAF8D3llVD/Zvq6oCajInTrImyeYkm4eHhydzqCRpHBOKe5Kn0Qv7JVV1Rbd8977LLd3nPd36LmBJ3+HHdmv7qar1VbWyqlYODQ1NdX5J0igm8mqZAF8AtlbVx/s2bQRWd7dXA9/oW39z96qZk4AH+i7fSJLmwLwJ7PMK4GzgJ0lu6tbeD5wPXJ7kbcB24Ixu21XAacA24GHgrTM5sCRpfOPGvaquBTLG5teMsn8B50xzLknSNPgOVUlqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAaNG/ckFybZk+SWvrV1SXYluan7OK1v2/uSbEtye5JTZmtwSdLYJvLM/SLgdaOsf6KqVnQfVwEkOR44EzihO+ZzSQ6bqWElSRMzbtyr6hrg3gk+3irgK1X1SFX9AtgGnDiN+SRJUzCda+5rk2zpLtsc1a0tBnb07bOzW5MkzaGpxv0C4PnACmA38LHJPkCSNUk2J9k8PDw8xTEkSaOZUtyr6u6qerSqHgM+zxOXXnYBS/p2PbZbG+0x1lfVyqpaOTQ0NJUxJEljmFLckyzqu/sGYN8raTYCZyY5PMlxwHLguumNKEmarHnj7ZDkUuBkYEGSncCHgZOTrAAKuBN4O0BV3ZrkcuA2YC9wTlU9OiuTS5LGNG7cq+qsUZa/cID9zwPOm85QkqTp8R2qktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktSgceOe5MIke5Lc0rd2dJKrk9zRfT6qW0+STyfZlmRLkpfM5vCSpNFN5Jn7RcDrRqydC2yqquXApu4+wKnA8u5jDXDBzIwpSZqMceNeVdcA945YXgVs6G5vAE7vW/9i9fwImJ9k0QzNKkmaoKlec19YVbu723cBC7vbi4Edffvt7NYkSXNo2j9QraoCarLHJVmTZHOSzcPDw9MdQ5LUZ6pxv3vf5Zbu855ufRewpG+/Y7u1J6mq9VW1sqpWDg0NTXEMSdJophr3jcDq7vZq4Bt962/uXjVzEvBA3+UbSdIcmTfeDkkuBU4GFiTZCXwYOB+4PMnbgO3AGd3uVwGnAduAh4G3zsLMkqRxjBv3qjprjE2vGWXfAs6Z7lCSpOnxHaqS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNmjedg5PcCTwEPArsraqVSY4GLgOWAXcCZ1TVfdMbU5I0GTPxzP3Pq2pFVa3s7p8LbKqq5cCm7r4kaQ7NxmWZVcCG7vYG4PRZOIck6QCmG/cC/ivJDUnWdGsLq2p3d/suYOE0zyFJmqRpXXMHXllVu5L8IXB1kp/2b6yqSlKjHdj9YbAGYOnSpdMcQ5LUb1rP3KtqV/d5D3AlcCJwd5JFAN3nPWMcu76qVlbVyqGhoemMIUkaYcpxT/KsJEfuuw28FrgF2Ais7nZbDXxjukNKkiZnOpdlFgJXJtn3OF+uqm8luR64PMnbgO3AGdMfU5I0GVOOe1X9HHjRKOv3AK+ZzlCSpOnxHaqS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNmrW4J3ldktuTbEty7mydR5L0ZLMS9ySHAZ8FTgWOB85KcvxsnEuS9GSz9cz9RGBbVf28qn4LfAVYNUvnkiSNMG+WHncxsKPv/k7gZf07JFkDrOnu/jrJ7bM0y6FoAfCrQQ8xnvzzoCfQAPi9ObOeN9aG2Yr7uKpqPbB+UOdvWZLNVbVy0HNII/m9OXdm67LMLmBJ3/1juzVJ0hyYrbhfDyxPclySpwNnAhtn6VySpBFm5bJMVe1Nshb4NnAYcGFV3Tob59KovNylpyq/N+dIqmrQM0iSZpjvUJWkBhl3SWqQcZekBg3sde6aOUn+hN47gBd3S7uAjVW1dXBTSRokn7kf5JL8A71f7xDguu4jwKX+wjY9VSV566BnaJ2vljnIJfkZcEJV/W7E+tOBW6tq+WAmk8aW5H+qaumg52iZl2UOfo8BxwDbR6wv6rZJA5Fky1ibgIVzOcuhyLgf/N4JbEpyB0/8sralwAuAtYMaSqIX8FOA+0asB/jB3I9zaDHuB7mq+laSF9L7Ncv9P1C9vqoeHdxkEv8BHFFVN43ckOR7cz7NIcZr7pLUIF8tI0kNMu6S1CDjLkkNMu6S1CDjLkkN+n9FlP1ETWJfHAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# join the dataframes containing y=1 and y=0\n",
    "df_test_under = pd.concat([df_class_0_under, df_class_1])\n",
    "\n",
    "print('Random under-sampling:')\n",
    "print(df_test_under['class'].value_counts())\n",
    "print(\"Num records = \", df_test_under.shape[0])\n",
    "\n",
    "df_test_under['class'].value_counts().plot(kind='bar', title='Count (target)');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Down Sampling - Majority Class - Using imblearn "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "imblearn over-sampling:\n",
      "0    268\n",
      "1    268\n",
      "Name: class, dtype: int64\n",
      "Num records =  536\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEFCAYAAAAYKqc0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAPyklEQVR4nO3df6zddX3H8edrVHEKs2DvaimtZVq3QRarqYjxR1hMRFhMMdkY6LAal5qFJhp/bPgj2hlZ2DJ/RiWpkVAFEaag3WQ6bDRI/AGFQQUq0ihdWwu98lvZ0MJ7f5xv4fRyb+/ve+inz0dyc8/5fL/f831fuDx7+r3nXFJVSJLa8nuDHkCSNPOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLY0gylOSnSX5/0LOMJsnh3XxDg55FTz3GXQOV5I1JNif5dZLdSf4zySvn4LyV5AXj7HYucFFV/W93zPeS/O1szzaWkeevqkeAC+nNKe3HuGtgkrwL+CTwT8BCYCnwOWDVAMcCes+KgdXAxTP4mPNm6rH6fBlY3c0rPc64ayCSPBv4CHBOVV1RVb+pqt9V1b9X1Xu7fQ5P8skkv+w+PrkvYknekuTaEY/5+LPxJBcl+WySbyZ5KMmPkzy/23ZNd8jN3d8Y/nqUEV8G3F9VO7tjzgNeBXymO+Yz3fqnkuxI8mCSG5K8qm+edUm+muTiJA8Cb0lyXJJrupm+0814cd8xJyX5QZL7k9yc5OQDnb+b7z7gpKn/21CLjLsG5eXAM4ArD7DPB+hFawXwIuBE4IOTOMeZwD8CRwHbgPMAqurV3fYXVdURVXXZKMf+GXD7vjtV9QHg+8Da7pi13abru/mOpvcs+t+SPKPvcVYBXwXmA5d0+1wHPAdYB5y9b8cki4FvAh/tHu89wNeSDB3g/ABb6f3zkR5n3DUozwF+VVV7D7DPm4CPVNWeqhqmF+qzD7D/SFdW1XXdOS6hF+GJmg88NN5OVXVxVd1TVXur6mPA4cAf9+3yw6r6elU9BgwBLwU+VFW/raprgY19+/4NcFVVXVVVj1XV1cBm4LRxxniom1d6nHHXoNwDLBjnOvQxwPa++9u7tYm6q+/2w8ARkzj2PuDI8XZK8p4kW5M8kOR+4NnAgr5ddvTdPga4t6oeHmP784C/6i7J3N893iuBReOMcSRw/3iz6tBi3DUoPwQeAU4/wD6/pBe8fZZ2awC/AZ65b0OS587wfFuAF45Y2+9XqHbX1/8eOAM4qqrmAw8AGeOY3cDRSZ7Zt7ak7/YO4EtVNb/v41lVdf5o5+/zp8DNE/iadAgx7hqIqnoA+BDw2SSnJ3lmkqclOTXJv3S7XQp8sHu9+YJu/30/fLwZOCHJiu4a97pJjnA38EcH2H4dML+7Dj7WMUcCe4FhYF6SDwF/MNYDVtV2epdZ1iV5epKXA6/v2+Vi4PVJTklyWJJnJDk5ybFjzdzNdzTwowN8LToEGXcNTHeN+l30fkg6TO+Z61rg690uH6UXwy3AT4AbuzWq6mf0Xm3zHeAOYL9XzkzAOmBDd/njjFFm+y1wEb3r4Pt8CvjLJPcl+TTwbeBbwM/oXTL6P/a/zDKaN9H7YfI93ddyGb2/wVBVO+j9APb9PPHP47088d/pyPMDvBHY0L3mXXpc/J91SKPr3vn5feDF+97INAvnuAz4aVV9eArHHk7vbzCvrqo9Mz6cDmrGXZpDSV4K3Av8Angtvb+lvLyq/nuQc6k9s/GOOUljey5wBb2Xgu4E/s6wazb4zF2SGuQPVCWpQcZdkhr0lLjmvmDBglq2bNmgx5Ckg8oNN9zwq6oa9ff5PyXivmzZMjZv3jzoMSTpoJJk+1jbvCwjSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUoKfEm5gOFsvO/eagR2jKnef/xaBHaIbfmzOrhe9Nn7lLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoPGjXuSJUm+m+S2JLcmeUe3vi7JriQ3dR+n9R3zviTbktye5JTZ/AIkSU82kV8/sBd4d1XdmORI4IYkV3fbPlFV/9q/c5LjgTOBE4BjgO8keWFVPTqTg0uSxjbuM/eq2l1VN3a3HwK2AosPcMgq4CtV9UhV/QLYBpw4E8NKkiZmUtfckywDXgz8uFtam2RLkguTHNWtLQZ29B22kwP/YSBJmmETjnuSI4CvAe+sqgeBC4DnAyuA3cDHJnPiJGuSbE6yeXh4eDKHSpLGMaG4J3kavbBfUlVXAFTV3VX1aFU9BnyeJy697AKW9B1+bLe2n6paX1Urq2rl0NDQdL4GSdIIE3m1TIAvAFur6uN964v6dnsDcEt3eyNwZpLDkxwHLAeum7mRJUnjmcirZV4BnA38JMlN3dr7gbOSrAAKuBN4O0BV3ZrkcuA2eq+0OcdXykjS3Bo37lV1LZBRNl11gGPOA86bxlySpGnwHaqS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNGjfuSZYk+W6S25LcmuQd3frRSa5Ockf3+ahuPUk+nWRbki1JXjLbX4QkaX8Teea+F3h3VR0PnASck+R44FxgU1UtBzZ19wFOBZZ3H2uAC2Z8aknSAY0b96raXVU3drcfArYCi4FVwIZutw3A6d3tVcAXq+dHwPwki2Z6cEnS2CZ1zT3JMuDFwI+BhVW1u9t0F7Cwu70Y2NF32M5uTZI0RyYc9yRHAF8D3llVD/Zvq6oCajInTrImyeYkm4eHhydzqCRpHBOKe5Kn0Qv7JVV1Rbd8977LLd3nPd36LmBJ3+HHdmv7qar1VbWyqlYODQ1NdX5J0igm8mqZAF8AtlbVx/s2bQRWd7dXA9/oW39z96qZk4AH+i7fSJLmwLwJ7PMK4GzgJ0lu6tbeD5wPXJ7kbcB24Ixu21XAacA24GHgrTM5sCRpfOPGvaquBTLG5teMsn8B50xzLknSNPgOVUlqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAaNG/ckFybZk+SWvrV1SXYluan7OK1v2/uSbEtye5JTZmtwSdLYJvLM/SLgdaOsf6KqVnQfVwEkOR44EzihO+ZzSQ6bqWElSRMzbtyr6hrg3gk+3irgK1X1SFX9AtgGnDiN+SRJUzCda+5rk2zpLtsc1a0tBnb07bOzW5MkzaGpxv0C4PnACmA38LHJPkCSNUk2J9k8PDw8xTEkSaOZUtyr6u6qerSqHgM+zxOXXnYBS/p2PbZbG+0x1lfVyqpaOTQ0NJUxJEljmFLckyzqu/sGYN8raTYCZyY5PMlxwHLguumNKEmarHnj7ZDkUuBkYEGSncCHgZOTrAAKuBN4O0BV3ZrkcuA2YC9wTlU9OiuTS5LGNG7cq+qsUZa/cID9zwPOm85QkqTp8R2qktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktSgceOe5MIke5Lc0rd2dJKrk9zRfT6qW0+STyfZlmRLkpfM5vCSpNFN5Jn7RcDrRqydC2yqquXApu4+wKnA8u5jDXDBzIwpSZqMceNeVdcA945YXgVs6G5vAE7vW/9i9fwImJ9k0QzNKkmaoKlec19YVbu723cBC7vbi4Edffvt7NYkSXNo2j9QraoCarLHJVmTZHOSzcPDw9MdQ5LUZ6pxv3vf5Zbu855ufRewpG+/Y7u1J6mq9VW1sqpWDg0NTXEMSdJophr3jcDq7vZq4Bt962/uXjVzEvBA3+UbSdIcmTfeDkkuBU4GFiTZCXwYOB+4PMnbgO3AGd3uVwGnAduAh4G3zsLMkqRxjBv3qjprjE2vGWXfAs6Z7lCSpOnxHaqS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNmjedg5PcCTwEPArsraqVSY4GLgOWAXcCZ1TVfdMbU5I0GTPxzP3Pq2pFVa3s7p8LbKqq5cCm7r4kaQ7NxmWZVcCG7vYG4PRZOIck6QCmG/cC/ivJDUnWdGsLq2p3d/suYOE0zyFJmqRpXXMHXllVu5L8IXB1kp/2b6yqSlKjHdj9YbAGYOnSpdMcQ5LUb1rP3KtqV/d5D3AlcCJwd5JFAN3nPWMcu76qVlbVyqGhoemMIUkaYcpxT/KsJEfuuw28FrgF2Ais7nZbDXxjukNKkiZnOpdlFgJXJtn3OF+uqm8luR64PMnbgO3AGdMfU5I0GVOOe1X9HHjRKOv3AK+ZzlCSpOnxHaqS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNmrW4J3ldktuTbEty7mydR5L0ZLMS9ySHAZ8FTgWOB85KcvxsnEuS9GSz9cz9RGBbVf28qn4LfAVYNUvnkiSNMG+WHncxsKPv/k7gZf07JFkDrOnu/jrJ7bM0y6FoAfCrQQ8xnvzzoCfQAPi9ObOeN9aG2Yr7uKpqPbB+UOdvWZLNVbVy0HNII/m9OXdm67LMLmBJ3/1juzVJ0hyYrbhfDyxPclySpwNnAhtn6VySpBFm5bJMVe1Nshb4NnAYcGFV3Tob59KovNylpyq/N+dIqmrQM0iSZpjvUJWkBhl3SWqQcZekBg3sde6aOUn+hN47gBd3S7uAjVW1dXBTSRokn7kf5JL8A71f7xDguu4jwKX+wjY9VSV566BnaJ2vljnIJfkZcEJV/W7E+tOBW6tq+WAmk8aW5H+qaumg52iZl2UOfo8BxwDbR6wv6rZJA5Fky1ibgIVzOcuhyLgf/N4JbEpyB0/8sralwAuAtYMaSqIX8FOA+0asB/jB3I9zaDHuB7mq+laSF9L7Ncv9P1C9vqoeHdxkEv8BHFFVN43ckOR7cz7NIcZr7pLUIF8tI0kNMu6S1CDjLkkNMu6S1CDjLkkN+n9FlP1ETWJfHAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from imblearn.under_sampling import RandomUnderSampler\n",
    "\n",
    "#separate the data in descriptive and target attributes\n",
    "X = data.drop('class', axis=1)\n",
    "Y = data['class']\n",
    "\n",
    "rus = RandomUnderSampler(random_state=42, replacement=True)\n",
    "X_rus, Y_rus = rus.fit_resample(X, Y)\n",
    "\n",
    "df_rus = pd.concat([pd.DataFrame(X_rus), pd.DataFrame(Y_rus, columns=['class'])], axis=1)\n",
    "\n",
    "print('imblearn over-sampling:')\n",
    "print(df_rus['class'].value_counts())\n",
    "print(\"Num records = \", df_rus.shape[0])\n",
    "\n",
    "df_rus['class'].value_counts().plot(kind='bar', title='Count (target)');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# we should have the same/similar results as previous. Although the selection of records could be different"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    " #### Down/Under sampling the majority class y=1 using Sci-Kit Learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original Data distribution\n",
      "0    500\n",
      "1    268\n",
      "Name: class, dtype: int64\n",
      "Sci-Kit Learn : resample : Down Sampled data set\n",
      "0    268\n",
      "1    268\n",
      "Name: class, dtype: int64\n",
      "Num records =  536\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEFCAYAAAAYKqc0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAPyklEQVR4nO3df6zddX3H8edrVHEKs2DvaimtZVq3QRarqYjxR1hMRFhMMdkY6LAal5qFJhp/bPgj2hlZ2DJ/RiWpkVAFEaag3WQ6bDRI/AGFQQUq0ihdWwu98lvZ0MJ7f5xv4fRyb+/ve+inz0dyc8/5fL/f831fuDx7+r3nXFJVSJLa8nuDHkCSNPOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLY0gylOSnSX5/0LOMJsnh3XxDg55FTz3GXQOV5I1JNif5dZLdSf4zySvn4LyV5AXj7HYucFFV/W93zPeS/O1szzaWkeevqkeAC+nNKe3HuGtgkrwL+CTwT8BCYCnwOWDVAMcCes+KgdXAxTP4mPNm6rH6fBlY3c0rPc64ayCSPBv4CHBOVV1RVb+pqt9V1b9X1Xu7fQ5P8skkv+w+PrkvYknekuTaEY/5+LPxJBcl+WySbyZ5KMmPkzy/23ZNd8jN3d8Y/nqUEV8G3F9VO7tjzgNeBXymO+Yz3fqnkuxI8mCSG5K8qm+edUm+muTiJA8Cb0lyXJJrupm+0814cd8xJyX5QZL7k9yc5OQDnb+b7z7gpKn/21CLjLsG5eXAM4ArD7DPB+hFawXwIuBE4IOTOMeZwD8CRwHbgPMAqurV3fYXVdURVXXZKMf+GXD7vjtV9QHg+8Da7pi13abru/mOpvcs+t+SPKPvcVYBXwXmA5d0+1wHPAdYB5y9b8cki4FvAh/tHu89wNeSDB3g/ABb6f3zkR5n3DUozwF+VVV7D7DPm4CPVNWeqhqmF+qzD7D/SFdW1XXdOS6hF+GJmg88NN5OVXVxVd1TVXur6mPA4cAf9+3yw6r6elU9BgwBLwU+VFW/raprgY19+/4NcFVVXVVVj1XV1cBm4LRxxniom1d6nHHXoNwDLBjnOvQxwPa++9u7tYm6q+/2w8ARkzj2PuDI8XZK8p4kW5M8kOR+4NnAgr5ddvTdPga4t6oeHmP784C/6i7J3N893iuBReOMcSRw/3iz6tBi3DUoPwQeAU4/wD6/pBe8fZZ2awC/AZ65b0OS587wfFuAF45Y2+9XqHbX1/8eOAM4qqrmAw8AGeOY3cDRSZ7Zt7ak7/YO4EtVNb/v41lVdf5o5+/zp8DNE/iadAgx7hqIqnoA+BDw2SSnJ3lmkqclOTXJv3S7XQp8sHu9+YJu/30/fLwZOCHJiu4a97pJjnA38EcH2H4dML+7Dj7WMUcCe4FhYF6SDwF/MNYDVtV2epdZ1iV5epKXA6/v2+Vi4PVJTklyWJJnJDk5ybFjzdzNdzTwowN8LToEGXcNTHeN+l30fkg6TO+Z61rg690uH6UXwy3AT4AbuzWq6mf0Xm3zHeAOYL9XzkzAOmBDd/njjFFm+y1wEb3r4Pt8CvjLJPcl+TTwbeBbwM/oXTL6P/a/zDKaN9H7YfI93ddyGb2/wVBVO+j9APb9PPHP47088d/pyPMDvBHY0L3mXXpc/J91SKPr3vn5feDF+97INAvnuAz4aVV9eArHHk7vbzCvrqo9Mz6cDmrGXZpDSV4K3Av8Angtvb+lvLyq/nuQc6k9s/GOOUljey5wBb2Xgu4E/s6wazb4zF2SGuQPVCWpQcZdkhr0lLjmvmDBglq2bNmgx5Ckg8oNN9zwq6oa9ff5PyXivmzZMjZv3jzoMSTpoJJk+1jbvCwjSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUoKfEm5gOFsvO/eagR2jKnef/xaBHaIbfmzOrhe9Nn7lLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoPGjXuSJUm+m+S2JLcmeUe3vi7JriQ3dR+n9R3zviTbktye5JTZ/AIkSU82kV8/sBd4d1XdmORI4IYkV3fbPlFV/9q/c5LjgTOBE4BjgO8keWFVPTqTg0uSxjbuM/eq2l1VN3a3HwK2AosPcMgq4CtV9UhV/QLYBpw4E8NKkiZmUtfckywDXgz8uFtam2RLkguTHNWtLQZ29B22kwP/YSBJmmETjnuSI4CvAe+sqgeBC4DnAyuA3cDHJnPiJGuSbE6yeXh4eDKHSpLGMaG4J3kavbBfUlVXAFTV3VX1aFU9BnyeJy697AKW9B1+bLe2n6paX1Urq2rl0NDQdL4GSdIIE3m1TIAvAFur6uN964v6dnsDcEt3eyNwZpLDkxwHLAeum7mRJUnjmcirZV4BnA38JMlN3dr7gbOSrAAKuBN4O0BV3ZrkcuA2eq+0OcdXykjS3Bo37lV1LZBRNl11gGPOA86bxlySpGnwHaqS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNGjfuSZYk+W6S25LcmuQd3frRSa5Ockf3+ahuPUk+nWRbki1JXjLbX4QkaX8Teea+F3h3VR0PnASck+R44FxgU1UtBzZ19wFOBZZ3H2uAC2Z8aknSAY0b96raXVU3drcfArYCi4FVwIZutw3A6d3tVcAXq+dHwPwki2Z6cEnS2CZ1zT3JMuDFwI+BhVW1u9t0F7Cwu70Y2NF32M5uTZI0RyYc9yRHAF8D3llVD/Zvq6oCajInTrImyeYkm4eHhydzqCRpHBOKe5Kn0Qv7JVV1Rbd8977LLd3nPd36LmBJ3+HHdmv7qar1VbWyqlYODQ1NdX5J0igm8mqZAF8AtlbVx/s2bQRWd7dXA9/oW39z96qZk4AH+i7fSJLmwLwJ7PMK4GzgJ0lu6tbeD5wPXJ7kbcB24Ixu21XAacA24GHgrTM5sCRpfOPGvaquBTLG5teMsn8B50xzLknSNPgOVUlqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAYZd0lqkHGXpAaNG/ckFybZk+SWvrV1SXYluan7OK1v2/uSbEtye5JTZmtwSdLYJvLM/SLgdaOsf6KqVnQfVwEkOR44EzihO+ZzSQ6bqWElSRMzbtyr6hrg3gk+3irgK1X1SFX9AtgGnDiN+SRJUzCda+5rk2zpLtsc1a0tBnb07bOzW5MkzaGpxv0C4PnACmA38LHJPkCSNUk2J9k8PDw8xTEkSaOZUtyr6u6qerSqHgM+zxOXXnYBS/p2PbZbG+0x1lfVyqpaOTQ0NJUxJEljmFLckyzqu/sGYN8raTYCZyY5PMlxwHLguumNKEmarHnj7ZDkUuBkYEGSncCHgZOTrAAKuBN4O0BV3ZrkcuA2YC9wTlU9OiuTS5LGNG7cq+qsUZa/cID9zwPOm85QkqTp8R2qktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktSgceOe5MIke5Lc0rd2dJKrk9zRfT6qW0+STyfZlmRLkpfM5vCSpNFN5Jn7RcDrRqydC2yqquXApu4+wKnA8u5jDXDBzIwpSZqMceNeVdcA945YXgVs6G5vAE7vW/9i9fwImJ9k0QzNKkmaoKlec19YVbu723cBC7vbi4Edffvt7NYkSXNo2j9QraoCarLHJVmTZHOSzcPDw9MdQ5LUZ6pxv3vf5Zbu855ufRewpG+/Y7u1J6mq9VW1sqpWDg0NTXEMSdJophr3jcDq7vZq4Bt962/uXjVzEvBA3+UbSdIcmTfeDkkuBU4GFiTZCXwYOB+4PMnbgO3AGd3uVwGnAduAh4G3zsLMkqRxjBv3qjprjE2vGWXfAs6Z7lCSpOnxHaqS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNmjedg5PcCTwEPArsraqVSY4GLgOWAXcCZ1TVfdMbU5I0GTPxzP3Pq2pFVa3s7p8LbKqq5cCm7r4kaQ7NxmWZVcCG7vYG4PRZOIck6QCmG/cC/ivJDUnWdGsLq2p3d/suYOE0zyFJmqRpXXMHXllVu5L8IXB1kp/2b6yqSlKjHdj9YbAGYOnSpdMcQ5LUb1rP3KtqV/d5D3AlcCJwd5JFAN3nPWMcu76qVlbVyqGhoemMIUkaYcpxT/KsJEfuuw28FrgF2Ais7nZbDXxjukNKkiZnOpdlFgJXJtn3OF+uqm8luR64PMnbgO3AGdMfU5I0GVOOe1X9HHjRKOv3AK+ZzlCSpOnxHaqS1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNmrW4J3ldktuTbEty7mydR5L0ZLMS9ySHAZ8FTgWOB85KcvxsnEuS9GSz9cz9RGBbVf28qn4LfAVYNUvnkiSNMG+WHncxsKPv/k7gZf07JFkDrOnu/jrJ7bM0y6FoAfCrQQ8xnvzzoCfQAPi9ObOeN9aG2Yr7uKpqPbB+UOdvWZLNVbVy0HNII/m9OXdm67LMLmBJ3/1juzVJ0hyYrbhfDyxPclySpwNnAhtn6VySpBFm5bJMVe1Nshb4NnAYcGFV3Tob59KovNylpyq/N+dIqmrQM0iSZpjvUJWkBhl3SWqQcZekBg3sde6aOUn+hN47gBd3S7uAjVW1dXBTSRokn7kf5JL8A71f7xDguu4jwKX+wjY9VSV566BnaJ2vljnIJfkZcEJV/W7E+tOBW6tq+WAmk8aW5H+qaumg52iZl2UOfo8BxwDbR6wv6rZJA5Fky1ibgIVzOcuhyLgf/N4JbEpyB0/8sralwAuAtYMaSqIX8FOA+0asB/jB3I9zaDHuB7mq+laSF9L7Ncv9P1C9vqoeHdxkEv8BHFFVN43ckOR7cz7NIcZr7pLUIF8tI0kNMu6S1CDjLkkNMu6S1CDjLkkN+n9FlP1ETWJfHAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from sklearn.utils import resample\n",
    "\n",
    "print(\"Original Data distribution\")\n",
    "print(data['class'].value_counts())\n",
    "\n",
    "# Down Sample Majority class\n",
    "down_sample = resample(data[data['class']==0],\n",
    "  replace = True, # sample with replacement\n",
    "  n_samples = data[data['class']==1].shape[0], # to match minority class\n",
    "  random_state=42) # reproducible results\n",
    "\n",
    "# Combine majority class with upsampled minority class\n",
    "train_downsample = pd.concat([data[data['class']==1], down_sample])\n",
    "\n",
    "# Display new class counts\n",
    "print('Sci-Kit Learn : resample : Down Sampled data set')\n",
    "print(train_downsample['class'].value_counts())\n",
    "print(\"Num records = \", train_downsample.shape[0])\n",
    "train_downsample['class'].value_counts().plot(kind='bar', title='Count (target)');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Over sampling the minority call y=0 (using random sampling)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random over-sampling:\n",
      "0    500\n",
      "1    500\n",
      "Name: class, dtype: int64\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEFCAYAAAAYKqc0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQKklEQVR4nO3df6zddX3H8edrVECFUaB3FVuwbNQ5zAKaihB/xEk2BefKH8pQp5WwNFkg0TB/MDWKRhdcsoFmzqwZhir+gKFIp0yHKFGj/CgKKKLSMVlbgVZoK8r8gbz3x/kUTq/39t62995DP30+kpPz+X4+n+/5vk97++r3fs73nJOqQpLUl98ZdQGSpJlnuEtShwx3SeqQ4S5JHTLcJalDhrskdchwlyaRZCzJ95M8cdS1TCTJAa2+sVHXoscfw10jleTVSdYm+VmSe5L8Z5Lnz8FxK8kxU0w7D7ikqv6v7XNdkr+e7domM/74VfVL4CMM6pR2YLhrZJKcC1wE/D2wEDgK+Bdg+QjLAgZnxcAK4NIZfMx5M/VYQz4BrGj1So8y3DUSSQ4B3gOcXVWfqaqfV9Wvq+o/qurNbc4BSS5K8uN2u2h7iCV5fZKvj3vMR8/Gk1yS5ENJPp/kwSQ3JPmDNvbVtsut7TeGv5ygxOcCW6tqQ9vnfcALgH9u+/xz6/9AkvVJfprk5iQvGKrn/CRXJLk0yU+B1yc5OslXW01fajVeOrTPiUm+kWRrkluTvGhnx2/1bQFO3P2/DfXIcNeonAQcCFy5kzlvZxBaxwPHAScA79iFY5wBvBs4FFgHvA+gql7Yxo+rqoOq6rIJ9v1j4AfbN6rq7cDXgHPaPue0oZtafYcxOIv+9yQHDj3OcuAKYD7w8TbnRuBw4HzgtdsnJlkEfB54b3u8NwGfTjK2k+MD3MHgz0d6lOGuUTkc+ElVPbyTOa8B3lNVm6pqM4Ogfu1O5o93ZVXd2I7xcQYhPF3zgQenmlRVl1bV/VX1cFX9I3AA8IdDU75ZVZ+tqkeAMeA5wDur6ldV9XVgzdDcvwKurqqrq+qRqroGWAucOkUZD7Z6pUcZ7hqV+4EFU6xDPxW4e2j77tY3XfcOtR8CDtqFfbcAB081KcmbktyRZFuSrcAhwIKhKeuH2k8FHqiqhyYZfxrwyrYks7U93vOBI6Yo42Bg61S1at9iuGtUvgn8EjhtJ3N+zCDwtjuq9QH8HHjS9oEkT5nh+m4Dnj6ub4ePUG3r628BTgcOrar5wDYgk+xzD3BYkicN9R051F4PfKyq5g/dnlxVF0x0/CF/BNw6jeekfYjhrpGoqm3AO4EPJTktyZOSPCHJKUn+oU37JPCOdr35gjZ/+4uPtwLPTHJ8W+M+fxdLuA/4/Z2M3wjMb+vgk+1zMPAwsBmYl+SdwO9O9oBVdTeDZZbzk+yf5CTg5UNTLgVenuQlSfZLcmCSFyVZPFnNrb7DgOt38ly0DzLcNTJtjfpcBi+SbmZw5noO8Nk25b0MwvA24DvAt1ofVfVDBlfbfAm4E9jhyplpOB9Y3ZY/Tp+gtl8BlzBYB9/uA8ArkmxJ8kHgi8AXgB8yWDL6BTsus0zkNQxeTL6/PZfLGPwGQ1WtZ/AC7Nt47M/jzTz273T88QFeDaxu17xLj4pf1iFNrL3z82vAs7a/kWkWjnEZ8P2qetdu7HsAg99gXlhVm2a8OO3VDHdpDiV5DvAA8D/AnzH4LeWkqvr2KOtSf2bjHXOSJvcU4DMMLgXdAPyNwa7Z4Jm7JHXIF1QlqUOGuyR16HGx5r5gwYJasmTJqMuQpL3KzTff/JOqmvDz/B8X4b5kyRLWrl076jIkaa+S5O7JxlyWkaQOGe6S1CHDXZI6ZLhLUocMd0nq0LTCPcmPknwnyS1J1ra+w5Jck+TOdn9o60+SDyZZl+S2JM+ezScgSfptu3Lm/idVdXxVLWvb5wHXVtVS4Nq2DXAKsLTdVgIfnqliJUnTsyfLMsuB1a29mse+UWc58NEauJ7BFx5M9TVhkqQZNN03MRXwX0kK+NeqWgUsrKp72vi9wMLWXsSOX1iwofXdM9RHkpUMzuw56qijdq/6ObbkvM+PuoSu/OiCl426hG74szmzevjZnG64P7+qNib5PeCaJN8fHqyqasE/be0/iFUAy5Yt86MpJWkGTWtZpqo2tvtNwJXACcB925db2v32b4LZyI5f+ru49UmS5siU4Z7kyUkO3t5m8O0x3wXWACvatBXAVa29Bnhdu2rmRGDb0PKNJGkOTGdZZiFwZZLt8z9RVV9IchNweZKzGHw58PYvGb4aOBVYBzwEnDnjVUuSdmrKcK+qu4DjJui/Hzh5gv4Czp6R6iRJu8V3qEpShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6NO1wT7Jfkm8n+VzbPjrJDUnWJbksyf6t/4C2va6NL5ml2iVJk9iVM/c3AHcMbb8fuLCqjgG2AGe1/rOALa3/wjZPkjSHphXuSRYDLwP+rW0HeDFwRZuyGjittZe3bdr4yW2+JGmOTPfM/SLgLcAjbftwYGtVPdy2NwCLWnsRsB6gjW9r8yVJc2TKcE/y58Cmqrp5Jg+cZGWStUnWbt68eSYfWpL2edM5c38e8BdJfgR8isFyzAeA+UnmtTmLgY2tvRE4EqCNHwLcP/5Bq2pVVS2rqmVjY2N79CQkSTuaMtyr6u+qanFVLQHOAL5cVa8BvgK8ok1bAVzV2mvaNm38y1VVM1q1JGmn9uQ697cC5yZZx2BN/eLWfzFweOs/Fzhvz0qUJO2qeVNPeUxVXQdc19p3ASdMMOcXwCtnoDZJ0m7yHaqS1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDk0Z7kkOTHJjkluT3J7k3a3/6CQ3JFmX5LIk+7f+A9r2uja+ZJafgyRpnOmcuf8SeHFVHQccD7w0yYnA+4ELq+oYYAtwVpt/FrCl9V/Y5kmS5tCU4V4DP2ubT2i3Al4MXNH6VwOntfbytk0bPzlJZqpgSdLUprXmnmS/JLcAm4BrgP8GtlbVw23KBmBRay8C1gO08W3A4TNYsyRpCtMK96r6TVUdDywGTgCesacHTrIyydokazdv3rynDydJGrJLV8tU1VbgK8BJwPwk89rQYmBja28EjgRo44cA90/wWKuqallVLRsbG9u96iVJE5rO1TJjSea39hOBPwXuYBDyr2jTVgBXtfaatk0b/3JV1QzWLEmawrypp3AEsDrJfgz+M7i8qj6X5HvAp5K8F/g2cHGbfzHwsSTrgAeAM2ahbknSTkwZ7lV1G/CsCfrvYrD+Pr7/F8ArZ6Q6SdJu8R2qktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUoSnDPcmRSb6S5HtJbk/yhtZ/WJJrktzZ7g9t/UnywSTrktyW5Nmz/SQkSTuazpn7w8DfVtWxwInA2UmOBc4Drq2qpcC1bRvgFGBpu60EPjzjVUuSdmrKcK+qe6rqW639IHAHsAhYDqxu01YDp7X2cuCjNXA9MD/JETNduCRpcru05p5kCfAs4AZgYVXd04buBRa29iJg/dBuG1qfJGmOTDvckxwEfBp4Y1X9dHisqgqoXTlwkpVJ1iZZu3nz5l3ZVZI0hWmFe5InMAj2j1fVZ1r3fduXW9r9pta/EThyaPfFrW8HVbWqqpZV1bKxsbHdrV+SNIHpXC0T4GLgjqr6p6GhNcCK1l4BXDXU/7p21cyJwLah5RtJ0hyYN405zwNeC3wnyS2t723ABcDlSc4C7gZOb2NXA6cC64CHgDNnsmBJ0tSmDPeq+jqQSYZPnmB+AWfvYV2SpD3gO1QlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHZoy3JN8JMmmJN8d6jssyTVJ7mz3h7b+JPlgknVJbkvy7NksXpI0semcuV8CvHRc33nAtVW1FLi2bQOcAixtt5XAh2emTEnSrpgy3Kvqq8AD47qXA6tbezVw2lD/R2vgemB+kiNmqFZJ0jTt7pr7wqq6p7XvBRa29iJg/dC8Da1PkjSH9vgF1aoqoHZ1vyQrk6xNsnbz5s17WoYkacjuhvt925db2v2m1r8ROHJo3uLW91uqalVVLauqZWNjY7tZhiRpIrsb7muAFa29ArhqqP917aqZE4FtQ8s3kqQ5Mm+qCUk+CbwIWJBkA/Au4ALg8iRnAXcDp7fpVwOnAuuAh4AzZ6FmSdIUpgz3qnrVJEMnTzC3gLP3tChJ0p7xHaqS1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktShWQn3JC9N8oMk65KcNxvHkCRNbsbDPcl+wIeAU4BjgVclOXamjyNJmtxsnLmfAKyrqruq6lfAp4Dls3AcSdIk5s3CYy4C1g9tbwCeO35SkpXAyrb5syQ/mIVa9lULgJ+Muoip5P2jrkAj4M/mzHraZAOzEe7TUlWrgFWjOn7PkqytqmWjrkMaz5/NuTMbyzIbgSOHthe3PknSHJmNcL8JWJrk6CT7A2cAa2bhOJKkScz4skxVPZzkHOCLwH7AR6rq9pk+jnbK5S49XvmzOUdSVaOuQZI0w3yHqiR1yHCXpA4Z7pLUoZFd566ZkeQZDN4BvKh1bQTWVNUdo6tK0qh55r4XS/JWBh/vEODGdgvwST+wTY9nSc4cdQ2982qZvViSHwLPrKpfj+vfH7i9qpaOpjJp55L8b1UdNeo6euayzN7tEeCpwN3j+o9oY9LIJLltsiFg4VzWsi8y3PdubwSuTXInj31Y21HAMcA5oypKahYCLwG2jOsP8I25L2ffYrjvxarqC0mezuBjlodfUL2pqn4zusokAD4HHFRVt4wfSHLdnFezj3HNXZI65NUyktQhw12SOmS4S1KHDHdJ6pDhLkkd+n+rPQ6LBFTagQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_class_1_over = df_class_1.sample(count_class_0, replace=True)\n",
    "\n",
    "df_test_over = pd.concat([df_class_0, df_class_1_over], axis=0)\n",
    "\n",
    "print('Random over-sampling:')\n",
    "print(df_test_over['class'].value_counts())\n",
    "\n",
    "df_test_over['class'].value_counts().plot(kind='bar', title='Count (target)');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Over sampling the minority call y=0 using SMOTE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    500\n",
      "1    268\n",
      "Name: class, dtype: int64\n",
      "SMOTE over-sampling:\n",
      "0    500\n",
      "1    500\n",
      "Name: class, dtype: int64\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEFCAYAAAAYKqc0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQKklEQVR4nO3df6zddX3H8edrVECFUaB3FVuwbNQ5zAKaihB/xEk2BefKH8pQp5WwNFkg0TB/MDWKRhdcsoFmzqwZhir+gKFIp0yHKFGj/CgKKKLSMVlbgVZoK8r8gbz3x/kUTq/39t62995DP30+kpPz+X4+n+/5vk97++r3fs73nJOqQpLUl98ZdQGSpJlnuEtShwx3SeqQ4S5JHTLcJalDhrskdchwlyaRZCzJ95M8cdS1TCTJAa2+sVHXoscfw10jleTVSdYm+VmSe5L8Z5Lnz8FxK8kxU0w7D7ikqv6v7XNdkr+e7domM/74VfVL4CMM6pR2YLhrZJKcC1wE/D2wEDgK+Bdg+QjLAgZnxcAK4NIZfMx5M/VYQz4BrGj1So8y3DUSSQ4B3gOcXVWfqaqfV9Wvq+o/qurNbc4BSS5K8uN2u2h7iCV5fZKvj3vMR8/Gk1yS5ENJPp/kwSQ3JPmDNvbVtsut7TeGv5ygxOcCW6tqQ9vnfcALgH9u+/xz6/9AkvVJfprk5iQvGKrn/CRXJLk0yU+B1yc5OslXW01fajVeOrTPiUm+kWRrkluTvGhnx2/1bQFO3P2/DfXIcNeonAQcCFy5kzlvZxBaxwPHAScA79iFY5wBvBs4FFgHvA+gql7Yxo+rqoOq6rIJ9v1j4AfbN6rq7cDXgHPaPue0oZtafYcxOIv+9yQHDj3OcuAKYD7w8TbnRuBw4HzgtdsnJlkEfB54b3u8NwGfTjK2k+MD3MHgz0d6lOGuUTkc+ElVPbyTOa8B3lNVm6pqM4Ogfu1O5o93ZVXd2I7xcQYhPF3zgQenmlRVl1bV/VX1cFX9I3AA8IdDU75ZVZ+tqkeAMeA5wDur6ldV9XVgzdDcvwKurqqrq+qRqroGWAucOkUZD7Z6pUcZ7hqV+4EFU6xDPxW4e2j77tY3XfcOtR8CDtqFfbcAB081KcmbktyRZFuSrcAhwIKhKeuH2k8FHqiqhyYZfxrwyrYks7U93vOBI6Yo42Bg61S1at9iuGtUvgn8EjhtJ3N+zCDwtjuq9QH8HHjS9oEkT5nh+m4Dnj6ub4ePUG3r628BTgcOrar5wDYgk+xzD3BYkicN9R051F4PfKyq5g/dnlxVF0x0/CF/BNw6jeekfYjhrpGoqm3AO4EPJTktyZOSPCHJKUn+oU37JPCOdr35gjZ/+4uPtwLPTHJ8W+M+fxdLuA/4/Z2M3wjMb+vgk+1zMPAwsBmYl+SdwO9O9oBVdTeDZZbzk+yf5CTg5UNTLgVenuQlSfZLcmCSFyVZPFnNrb7DgOt38ly0DzLcNTJtjfpcBi+SbmZw5noO8Nk25b0MwvA24DvAt1ofVfVDBlfbfAm4E9jhyplpOB9Y3ZY/Tp+gtl8BlzBYB9/uA8ArkmxJ8kHgi8AXgB8yWDL6BTsus0zkNQxeTL6/PZfLGPwGQ1WtZ/AC7Nt47M/jzTz273T88QFeDaxu17xLj4pf1iFNrL3z82vAs7a/kWkWjnEZ8P2qetdu7HsAg99gXlhVm2a8OO3VDHdpDiV5DvAA8D/AnzH4LeWkqvr2KOtSf2bjHXOSJvcU4DMMLgXdAPyNwa7Z4Jm7JHXIF1QlqUOGuyR16HGx5r5gwYJasmTJqMuQpL3KzTff/JOqmvDz/B8X4b5kyRLWrl076jIkaa+S5O7JxlyWkaQOGe6S1CHDXZI6ZLhLUocMd0nq0LTCPcmPknwnyS1J1ra+w5Jck+TOdn9o60+SDyZZl+S2JM+ezScgSfptu3Lm/idVdXxVLWvb5wHXVtVS4Nq2DXAKsLTdVgIfnqliJUnTsyfLMsuB1a29mse+UWc58NEauJ7BFx5M9TVhkqQZNN03MRXwX0kK+NeqWgUsrKp72vi9wMLWXsSOX1iwofXdM9RHkpUMzuw56qijdq/6ObbkvM+PuoSu/OiCl426hG74szmzevjZnG64P7+qNib5PeCaJN8fHqyqasE/be0/iFUAy5Yt86MpJWkGTWtZpqo2tvtNwJXACcB925db2v32b4LZyI5f+ru49UmS5siU4Z7kyUkO3t5m8O0x3wXWACvatBXAVa29Bnhdu2rmRGDb0PKNJGkOTGdZZiFwZZLt8z9RVV9IchNweZKzGHw58PYvGb4aOBVYBzwEnDnjVUuSdmrKcK+qu4DjJui/Hzh5gv4Czp6R6iRJu8V3qEpShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6NO1wT7Jfkm8n+VzbPjrJDUnWJbksyf6t/4C2va6NL5ml2iVJk9iVM/c3AHcMbb8fuLCqjgG2AGe1/rOALa3/wjZPkjSHphXuSRYDLwP+rW0HeDFwRZuyGjittZe3bdr4yW2+JGmOTPfM/SLgLcAjbftwYGtVPdy2NwCLWnsRsB6gjW9r8yVJc2TKcE/y58Cmqrp5Jg+cZGWStUnWbt68eSYfWpL2edM5c38e8BdJfgR8isFyzAeA+UnmtTmLgY2tvRE4EqCNHwLcP/5Bq2pVVS2rqmVjY2N79CQkSTuaMtyr6u+qanFVLQHOAL5cVa8BvgK8ok1bAVzV2mvaNm38y1VVM1q1JGmn9uQ697cC5yZZx2BN/eLWfzFweOs/Fzhvz0qUJO2qeVNPeUxVXQdc19p3ASdMMOcXwCtnoDZJ0m7yHaqS1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDk0Z7kkOTHJjkluT3J7k3a3/6CQ3JFmX5LIk+7f+A9r2uja+ZJafgyRpnOmcuf8SeHFVHQccD7w0yYnA+4ELq+oYYAtwVpt/FrCl9V/Y5kmS5tCU4V4DP2ubT2i3Al4MXNH6VwOntfbytk0bPzlJZqpgSdLUprXmnmS/JLcAm4BrgP8GtlbVw23KBmBRay8C1gO08W3A4TNYsyRpCtMK96r6TVUdDywGTgCesacHTrIyydokazdv3rynDydJGrJLV8tU1VbgK8BJwPwk89rQYmBja28EjgRo44cA90/wWKuqallVLRsbG9u96iVJE5rO1TJjSea39hOBPwXuYBDyr2jTVgBXtfaatk0b/3JV1QzWLEmawrypp3AEsDrJfgz+M7i8qj6X5HvAp5K8F/g2cHGbfzHwsSTrgAeAM2ahbknSTkwZ7lV1G/CsCfrvYrD+Pr7/F8ArZ6Q6SdJu8R2qktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUoSnDPcmRSb6S5HtJbk/yhtZ/WJJrktzZ7g9t/UnywSTrktyW5Nmz/SQkSTuazpn7w8DfVtWxwInA2UmOBc4Drq2qpcC1bRvgFGBpu60EPjzjVUuSdmrKcK+qe6rqW639IHAHsAhYDqxu01YDp7X2cuCjNXA9MD/JETNduCRpcru05p5kCfAs4AZgYVXd04buBRa29iJg/dBuG1qfJGmOTDvckxwEfBp4Y1X9dHisqgqoXTlwkpVJ1iZZu3nz5l3ZVZI0hWmFe5InMAj2j1fVZ1r3fduXW9r9pta/EThyaPfFrW8HVbWqqpZV1bKxsbHdrV+SNIHpXC0T4GLgjqr6p6GhNcCK1l4BXDXU/7p21cyJwLah5RtJ0hyYN405zwNeC3wnyS2t723ABcDlSc4C7gZOb2NXA6cC64CHgDNnsmBJ0tSmDPeq+jqQSYZPnmB+AWfvYV2SpD3gO1QlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHZoy3JN8JMmmJN8d6jssyTVJ7mz3h7b+JPlgknVJbkvy7NksXpI0semcuV8CvHRc33nAtVW1FLi2bQOcAixtt5XAh2emTEnSrpgy3Kvqq8AD47qXA6tbezVw2lD/R2vgemB+kiNmqFZJ0jTt7pr7wqq6p7XvBRa29iJg/dC8Da1PkjSH9vgF1aoqoHZ1vyQrk6xNsnbz5s17WoYkacjuhvt925db2v2m1r8ROHJo3uLW91uqalVVLauqZWNjY7tZhiRpIrsb7muAFa29ArhqqP917aqZE4FtQ8s3kqQ5Mm+qCUk+CbwIWJBkA/Au4ALg8iRnAXcDp7fpVwOnAuuAh4AzZ6FmSdIUpgz3qnrVJEMnTzC3gLP3tChJ0p7xHaqS1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktShWQn3JC9N8oMk65KcNxvHkCRNbsbDPcl+wIeAU4BjgVclOXamjyNJmtxsnLmfAKyrqruq6lfAp4Dls3AcSdIk5s3CYy4C1g9tbwCeO35SkpXAyrb5syQ/mIVa9lULgJ+Muoip5P2jrkAj4M/mzHraZAOzEe7TUlWrgFWjOn7PkqytqmWjrkMaz5/NuTMbyzIbgSOHthe3PknSHJmNcL8JWJrk6CT7A2cAa2bhOJKkScz4skxVPZzkHOCLwH7AR6rq9pk+jnbK5S49XvmzOUdSVaOuQZI0w3yHqiR1yHCXpA4Z7pLUoZFd566ZkeQZDN4BvKh1bQTWVNUdo6tK0qh55r4XS/JWBh/vEODGdgvwST+wTY9nSc4cdQ2982qZvViSHwLPrKpfj+vfH7i9qpaOpjJp55L8b1UdNeo6euayzN7tEeCpwN3j+o9oY9LIJLltsiFg4VzWsi8y3PdubwSuTXInj31Y21HAMcA5oypKahYCLwG2jOsP8I25L2ffYrjvxarqC0mezuBjlodfUL2pqn4zusokAD4HHFRVt4wfSHLdnFezj3HNXZI65NUyktQhw12SOmS4S1KHDHdJ6pDhLkkd+n+rPQ6LBFTagQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from imblearn.over_sampling import SMOTE\n",
    "\n",
    "print(data['class'].value_counts())\n",
    "X = data.drop('class', axis=1)\n",
    "Y = data['class']\n",
    "\n",
    "sm = SMOTE(random_state=42)\n",
    "X_res, Y_res = sm.fit_resample(X, Y)\n",
    "\n",
    "df_smote_over = pd.concat([pd.DataFrame(X_res), pd.DataFrame(Y_res, columns=['class'])], axis=1)\n",
    "\n",
    "print('SMOTE over-sampling:')\n",
    "print(df_smote_over['class'].value_counts())\n",
    "\n",
    "df_smote_over['class'].value_counts().plot(kind='bar', title='Count (target)');"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}