Shiva7706 commited on
Commit
ac69e74
·
1 Parent(s): 5556346

Delete Movie-recommendation-system

Browse files
Movie-recommendation-system/README.md DELETED
@@ -1,2 +0,0 @@
1
- # Movie-recommendation-system
2
- Movie Recommendation System , My first ML project
 
 
 
Movie-recommendation-system/main.ipynb DELETED
@@ -1,1393 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import pandas as pd\n"
10
- ]
11
- },
12
- {
13
- "cell_type": "code",
14
- "execution_count": 2,
15
- "metadata": {},
16
- "outputs": [],
17
- "source": [
18
- "movies = pd.read_csv('top10K-TMDB-movies.csv')"
19
- ]
20
- },
21
- {
22
- "cell_type": "code",
23
- "execution_count": 3,
24
- "metadata": {},
25
- "outputs": [
26
- {
27
- "data": {
28
- "text/html": [
29
- "<div>\n",
30
- "<style scoped>\n",
31
- " .dataframe tbody tr th:only-of-type {\n",
32
- " vertical-align: middle;\n",
33
- " }\n",
34
- "\n",
35
- " .dataframe tbody tr th {\n",
36
- " vertical-align: top;\n",
37
- " }\n",
38
- "\n",
39
- " .dataframe thead th {\n",
40
- " text-align: right;\n",
41
- " }\n",
42
- "</style>\n",
43
- "<table border=\"1\" class=\"dataframe\">\n",
44
- " <thead>\n",
45
- " <tr style=\"text-align: right;\">\n",
46
- " <th></th>\n",
47
- " <th>id</th>\n",
48
- " <th>title</th>\n",
49
- " <th>genre</th>\n",
50
- " <th>original_language</th>\n",
51
- " <th>overview</th>\n",
52
- " <th>popularity</th>\n",
53
- " <th>release_date</th>\n",
54
- " <th>vote_average</th>\n",
55
- " <th>vote_count</th>\n",
56
- " </tr>\n",
57
- " </thead>\n",
58
- " <tbody>\n",
59
- " <tr>\n",
60
- " <th>0</th>\n",
61
- " <td>278</td>\n",
62
- " <td>The Shawshank Redemption</td>\n",
63
- " <td>Drama,Crime</td>\n",
64
- " <td>en</td>\n",
65
- " <td>Framed in the 1940s for the double murder of h...</td>\n",
66
- " <td>94.075</td>\n",
67
- " <td>1994-09-23</td>\n",
68
- " <td>8.7</td>\n",
69
- " <td>21862</td>\n",
70
- " </tr>\n",
71
- " <tr>\n",
72
- " <th>1</th>\n",
73
- " <td>19404</td>\n",
74
- " <td>Dilwale Dulhania Le Jayenge</td>\n",
75
- " <td>Comedy,Drama,Romance</td>\n",
76
- " <td>hi</td>\n",
77
- " <td>Raj is a rich, carefree, happy-go-lucky second...</td>\n",
78
- " <td>25.408</td>\n",
79
- " <td>1995-10-19</td>\n",
80
- " <td>8.7</td>\n",
81
- " <td>3731</td>\n",
82
- " </tr>\n",
83
- " <tr>\n",
84
- " <th>2</th>\n",
85
- " <td>238</td>\n",
86
- " <td>The Godfather</td>\n",
87
- " <td>Drama,Crime</td>\n",
88
- " <td>en</td>\n",
89
- " <td>Spanning the years 1945 to 1955, a chronicle o...</td>\n",
90
- " <td>90.585</td>\n",
91
- " <td>1972-03-14</td>\n",
92
- " <td>8.7</td>\n",
93
- " <td>16280</td>\n",
94
- " </tr>\n",
95
- " <tr>\n",
96
- " <th>3</th>\n",
97
- " <td>424</td>\n",
98
- " <td>Schindler's List</td>\n",
99
- " <td>Drama,History,War</td>\n",
100
- " <td>en</td>\n",
101
- " <td>The true story of how businessman Oskar Schind...</td>\n",
102
- " <td>44.761</td>\n",
103
- " <td>1993-12-15</td>\n",
104
- " <td>8.6</td>\n",
105
- " <td>12959</td>\n",
106
- " </tr>\n",
107
- " <tr>\n",
108
- " <th>4</th>\n",
109
- " <td>240</td>\n",
110
- " <td>The Godfather: Part II</td>\n",
111
- " <td>Drama,Crime</td>\n",
112
- " <td>en</td>\n",
113
- " <td>In the continuing saga of the Corleone crime f...</td>\n",
114
- " <td>57.749</td>\n",
115
- " <td>1974-12-20</td>\n",
116
- " <td>8.6</td>\n",
117
- " <td>9811</td>\n",
118
- " </tr>\n",
119
- " <tr>\n",
120
- " <th>5</th>\n",
121
- " <td>667257</td>\n",
122
- " <td>Impossible Things</td>\n",
123
- " <td>Family,Drama</td>\n",
124
- " <td>es</td>\n",
125
- " <td>Matilde is a woman who, after the death of her...</td>\n",
126
- " <td>14.358</td>\n",
127
- " <td>2021-06-17</td>\n",
128
- " <td>8.6</td>\n",
129
- " <td>255</td>\n",
130
- " </tr>\n",
131
- " <tr>\n",
132
- " <th>6</th>\n",
133
- " <td>129</td>\n",
134
- " <td>Spirited Away</td>\n",
135
- " <td>Animation,Family,Fantasy</td>\n",
136
- " <td>ja</td>\n",
137
- " <td>A young girl, Chihiro, becomes trapped in a st...</td>\n",
138
- " <td>92.056</td>\n",
139
- " <td>2001-07-20</td>\n",
140
- " <td>8.5</td>\n",
141
- " <td>13093</td>\n",
142
- " </tr>\n",
143
- " <tr>\n",
144
- " <th>7</th>\n",
145
- " <td>730154</td>\n",
146
- " <td>Your Eyes Tell</td>\n",
147
- " <td>Romance,Drama</td>\n",
148
- " <td>ja</td>\n",
149
- " <td>A tragic accident lead to Kaori's blindness, b...</td>\n",
150
- " <td>51.345</td>\n",
151
- " <td>2020-10-23</td>\n",
152
- " <td>8.5</td>\n",
153
- " <td>339</td>\n",
154
- " </tr>\n",
155
- " <tr>\n",
156
- " <th>8</th>\n",
157
- " <td>372754</td>\n",
158
- " <td>Dou kyu sei – Classmates</td>\n",
159
- " <td>Romance,Animation</td>\n",
160
- " <td>ja</td>\n",
161
- " <td>Rihito Sajo, an honor student with a perfect s...</td>\n",
162
- " <td>14.285</td>\n",
163
- " <td>2016-02-20</td>\n",
164
- " <td>8.5</td>\n",
165
- " <td>239</td>\n",
166
- " </tr>\n",
167
- " <tr>\n",
168
- " <th>9</th>\n",
169
- " <td>372058</td>\n",
170
- " <td>Your Name.</td>\n",
171
- " <td>Romance,Animation,Drama</td>\n",
172
- " <td>ja</td>\n",
173
- " <td>High schoolers Mitsuha and Taki are complete s...</td>\n",
174
- " <td>158.270</td>\n",
175
- " <td>2016-08-26</td>\n",
176
- " <td>8.5</td>\n",
177
- " <td>8895</td>\n",
178
- " </tr>\n",
179
- " </tbody>\n",
180
- "</table>\n",
181
- "</div>"
182
- ],
183
- "text/plain": [
184
- " id title genre \\\n",
185
- "0 278 The Shawshank Redemption Drama,Crime \n",
186
- "1 19404 Dilwale Dulhania Le Jayenge Comedy,Drama,Romance \n",
187
- "2 238 The Godfather Drama,Crime \n",
188
- "3 424 Schindler's List Drama,History,War \n",
189
- "4 240 The Godfather: Part II Drama,Crime \n",
190
- "5 667257 Impossible Things Family,Drama \n",
191
- "6 129 Spirited Away Animation,Family,Fantasy \n",
192
- "7 730154 Your Eyes Tell Romance,Drama \n",
193
- "8 372754 Dou kyu sei – Classmates Romance,Animation \n",
194
- "9 372058 Your Name. Romance,Animation,Drama \n",
195
- "\n",
196
- " original_language overview \\\n",
197
- "0 en Framed in the 1940s for the double murder of h... \n",
198
- "1 hi Raj is a rich, carefree, happy-go-lucky second... \n",
199
- "2 en Spanning the years 1945 to 1955, a chronicle o... \n",
200
- "3 en The true story of how businessman Oskar Schind... \n",
201
- "4 en In the continuing saga of the Corleone crime f... \n",
202
- "5 es Matilde is a woman who, after the death of her... \n",
203
- "6 ja A young girl, Chihiro, becomes trapped in a st... \n",
204
- "7 ja A tragic accident lead to Kaori's blindness, b... \n",
205
- "8 ja Rihito Sajo, an honor student with a perfect s... \n",
206
- "9 ja High schoolers Mitsuha and Taki are complete s... \n",
207
- "\n",
208
- " popularity release_date vote_average vote_count \n",
209
- "0 94.075 1994-09-23 8.7 21862 \n",
210
- "1 25.408 1995-10-19 8.7 3731 \n",
211
- "2 90.585 1972-03-14 8.7 16280 \n",
212
- "3 44.761 1993-12-15 8.6 12959 \n",
213
- "4 57.749 1974-12-20 8.6 9811 \n",
214
- "5 14.358 2021-06-17 8.6 255 \n",
215
- "6 92.056 2001-07-20 8.5 13093 \n",
216
- "7 51.345 2020-10-23 8.5 339 \n",
217
- "8 14.285 2016-02-20 8.5 239 \n",
218
- "9 158.270 2016-08-26 8.5 8895 "
219
- ]
220
- },
221
- "execution_count": 3,
222
- "metadata": {},
223
- "output_type": "execute_result"
224
- }
225
- ],
226
- "source": [
227
- "movies.head(10)"
228
- ]
229
- },
230
- {
231
- "cell_type": "code",
232
- "execution_count": 4,
233
- "metadata": {},
234
- "outputs": [
235
- {
236
- "data": {
237
- "text/html": [
238
- "<div>\n",
239
- "<style scoped>\n",
240
- " .dataframe tbody tr th:only-of-type {\n",
241
- " vertical-align: middle;\n",
242
- " }\n",
243
- "\n",
244
- " .dataframe tbody tr th {\n",
245
- " vertical-align: top;\n",
246
- " }\n",
247
- "\n",
248
- " .dataframe thead th {\n",
249
- " text-align: right;\n",
250
- " }\n",
251
- "</style>\n",
252
- "<table border=\"1\" class=\"dataframe\">\n",
253
- " <thead>\n",
254
- " <tr style=\"text-align: right;\">\n",
255
- " <th></th>\n",
256
- " <th>id</th>\n",
257
- " <th>popularity</th>\n",
258
- " <th>vote_average</th>\n",
259
- " <th>vote_count</th>\n",
260
- " </tr>\n",
261
- " </thead>\n",
262
- " <tbody>\n",
263
- " <tr>\n",
264
- " <th>count</th>\n",
265
- " <td>10000.000000</td>\n",
266
- " <td>10000.000000</td>\n",
267
- " <td>10000.000000</td>\n",
268
- " <td>10000.000000</td>\n",
269
- " </tr>\n",
270
- " <tr>\n",
271
- " <th>mean</th>\n",
272
- " <td>161243.505000</td>\n",
273
- " <td>34.697267</td>\n",
274
- " <td>6.621150</td>\n",
275
- " <td>1547.309400</td>\n",
276
- " </tr>\n",
277
- " <tr>\n",
278
- " <th>std</th>\n",
279
- " <td>211422.046043</td>\n",
280
- " <td>211.684175</td>\n",
281
- " <td>0.766231</td>\n",
282
- " <td>2648.295789</td>\n",
283
- " </tr>\n",
284
- " <tr>\n",
285
- " <th>min</th>\n",
286
- " <td>5.000000</td>\n",
287
- " <td>0.600000</td>\n",
288
- " <td>4.600000</td>\n",
289
- " <td>200.000000</td>\n",
290
- " </tr>\n",
291
- " <tr>\n",
292
- " <th>25%</th>\n",
293
- " <td>10127.750000</td>\n",
294
- " <td>9.154750</td>\n",
295
- " <td>6.100000</td>\n",
296
- " <td>315.000000</td>\n",
297
- " </tr>\n",
298
- " <tr>\n",
299
- " <th>50%</th>\n",
300
- " <td>30002.500000</td>\n",
301
- " <td>13.637500</td>\n",
302
- " <td>6.600000</td>\n",
303
- " <td>583.500000</td>\n",
304
- " </tr>\n",
305
- " <tr>\n",
306
- " <th>75%</th>\n",
307
- " <td>310133.500000</td>\n",
308
- " <td>25.651250</td>\n",
309
- " <td>7.200000</td>\n",
310
- " <td>1460.000000</td>\n",
311
- " </tr>\n",
312
- " <tr>\n",
313
- " <th>max</th>\n",
314
- " <td>934761.000000</td>\n",
315
- " <td>10436.917000</td>\n",
316
- " <td>8.700000</td>\n",
317
- " <td>31917.000000</td>\n",
318
- " </tr>\n",
319
- " </tbody>\n",
320
- "</table>\n",
321
- "</div>"
322
- ],
323
- "text/plain": [
324
- " id popularity vote_average vote_count\n",
325
- "count 10000.000000 10000.000000 10000.000000 10000.000000\n",
326
- "mean 161243.505000 34.697267 6.621150 1547.309400\n",
327
- "std 211422.046043 211.684175 0.766231 2648.295789\n",
328
- "min 5.000000 0.600000 4.600000 200.000000\n",
329
- "25% 10127.750000 9.154750 6.100000 315.000000\n",
330
- "50% 30002.500000 13.637500 6.600000 583.500000\n",
331
- "75% 310133.500000 25.651250 7.200000 1460.000000\n",
332
- "max 934761.000000 10436.917000 8.700000 31917.000000"
333
- ]
334
- },
335
- "execution_count": 4,
336
- "metadata": {},
337
- "output_type": "execute_result"
338
- }
339
- ],
340
- "source": [
341
- "movies.describe()"
342
- ]
343
- },
344
- {
345
- "cell_type": "code",
346
- "execution_count": 5,
347
- "metadata": {},
348
- "outputs": [
349
- {
350
- "name": "stdout",
351
- "output_type": "stream",
352
- "text": [
353
- "<class 'pandas.core.frame.DataFrame'>\n",
354
- "RangeIndex: 10000 entries, 0 to 9999\n",
355
- "Data columns (total 9 columns):\n",
356
- " # Column Non-Null Count Dtype \n",
357
- "--- ------ -------------- ----- \n",
358
- " 0 id 10000 non-null int64 \n",
359
- " 1 title 10000 non-null object \n",
360
- " 2 genre 9997 non-null object \n",
361
- " 3 original_language 10000 non-null object \n",
362
- " 4 overview 9987 non-null object \n",
363
- " 5 popularity 10000 non-null float64\n",
364
- " 6 release_date 10000 non-null object \n",
365
- " 7 vote_average 10000 non-null float64\n",
366
- " 8 vote_count 10000 non-null int64 \n",
367
- "dtypes: float64(2), int64(2), object(5)\n",
368
- "memory usage: 703.2+ KB\n"
369
- ]
370
- }
371
- ],
372
- "source": [
373
- "movies.info()"
374
- ]
375
- },
376
- {
377
- "cell_type": "code",
378
- "execution_count": 6,
379
- "metadata": {},
380
- "outputs": [
381
- {
382
- "data": {
383
- "text/plain": [
384
- "Index(['id', 'title', 'genre', 'original_language', 'overview', 'popularity',\n",
385
- " 'release_date', 'vote_average', 'vote_count'],\n",
386
- " dtype='object')"
387
- ]
388
- },
389
- "execution_count": 6,
390
- "metadata": {},
391
- "output_type": "execute_result"
392
- }
393
- ],
394
- "source": [
395
- "movies.columns"
396
- ]
397
- },
398
- {
399
- "cell_type": "code",
400
- "execution_count": 36,
401
- "metadata": {},
402
- "outputs": [],
403
- "source": [
404
- "movies=movies[['id','title','overview','genre']]"
405
- ]
406
- },
407
- {
408
- "cell_type": "code",
409
- "execution_count": 37,
410
- "metadata": {},
411
- "outputs": [],
412
- "source": [
413
- "movies['tags']=movies['overview']+movies['genre']"
414
- ]
415
- },
416
- {
417
- "cell_type": "code",
418
- "execution_count": 38,
419
- "metadata": {},
420
- "outputs": [],
421
- "source": [
422
- "new_movies = movies.drop(columns=['overview','genre'])"
423
- ]
424
- },
425
- {
426
- "cell_type": "code",
427
- "execution_count": 39,
428
- "metadata": {},
429
- "outputs": [
430
- {
431
- "data": {
432
- "text/html": [
433
- "<div>\n",
434
- "<style scoped>\n",
435
- " .dataframe tbody tr th:only-of-type {\n",
436
- " vertical-align: middle;\n",
437
- " }\n",
438
- "\n",
439
- " .dataframe tbody tr th {\n",
440
- " vertical-align: top;\n",
441
- " }\n",
442
- "\n",
443
- " .dataframe thead th {\n",
444
- " text-align: right;\n",
445
- " }\n",
446
- "</style>\n",
447
- "<table border=\"1\" class=\"dataframe\">\n",
448
- " <thead>\n",
449
- " <tr style=\"text-align: right;\">\n",
450
- " <th></th>\n",
451
- " <th>id</th>\n",
452
- " <th>title</th>\n",
453
- " <th>tags</th>\n",
454
- " </tr>\n",
455
- " </thead>\n",
456
- " <tbody>\n",
457
- " <tr>\n",
458
- " <th>0</th>\n",
459
- " <td>278</td>\n",
460
- " <td>The Shawshank Redemption</td>\n",
461
- " <td>Framed in the 1940s for the double murder of h...</td>\n",
462
- " </tr>\n",
463
- " <tr>\n",
464
- " <th>1</th>\n",
465
- " <td>19404</td>\n",
466
- " <td>Dilwale Dulhania Le Jayenge</td>\n",
467
- " <td>Raj is a rich, carefree, happy-go-lucky second...</td>\n",
468
- " </tr>\n",
469
- " <tr>\n",
470
- " <th>2</th>\n",
471
- " <td>238</td>\n",
472
- " <td>The Godfather</td>\n",
473
- " <td>Spanning the years 1945 to 1955, a chronicle o...</td>\n",
474
- " </tr>\n",
475
- " <tr>\n",
476
- " <th>3</th>\n",
477
- " <td>424</td>\n",
478
- " <td>Schindler's List</td>\n",
479
- " <td>The true story of how businessman Oskar Schind...</td>\n",
480
- " </tr>\n",
481
- " <tr>\n",
482
- " <th>4</th>\n",
483
- " <td>240</td>\n",
484
- " <td>The Godfather: Part II</td>\n",
485
- " <td>In the continuing saga of the Corleone crime f...</td>\n",
486
- " </tr>\n",
487
- " <tr>\n",
488
- " <th>...</th>\n",
489
- " <td>...</td>\n",
490
- " <td>...</td>\n",
491
- " <td>...</td>\n",
492
- " </tr>\n",
493
- " <tr>\n",
494
- " <th>9995</th>\n",
495
- " <td>10196</td>\n",
496
- " <td>The Last Airbender</td>\n",
497
- " <td>The story follows the adventures of Aang, a yo...</td>\n",
498
- " </tr>\n",
499
- " <tr>\n",
500
- " <th>9996</th>\n",
501
- " <td>331446</td>\n",
502
- " <td>Sharknado 3: Oh Hell No!</td>\n",
503
- " <td>The sharks take bite out of the East Coast whe...</td>\n",
504
- " </tr>\n",
505
- " <tr>\n",
506
- " <th>9997</th>\n",
507
- " <td>13995</td>\n",
508
- " <td>Captain America</td>\n",
509
- " <td>During World War II, a brave, patriotic Americ...</td>\n",
510
- " </tr>\n",
511
- " <tr>\n",
512
- " <th>9998</th>\n",
513
- " <td>2312</td>\n",
514
- " <td>In the Name of the King: A Dungeon Siege Tale</td>\n",
515
- " <td>A man named Farmer sets out to rescue his kidn...</td>\n",
516
- " </tr>\n",
517
- " <tr>\n",
518
- " <th>9999</th>\n",
519
- " <td>455957</td>\n",
520
- " <td>Domino</td>\n",
521
- " <td>Seeking justice for his partner’s murder by an...</td>\n",
522
- " </tr>\n",
523
- " </tbody>\n",
524
- "</table>\n",
525
- "<p>10000 rows × 3 columns</p>\n",
526
- "</div>"
527
- ],
528
- "text/plain": [
529
- " id title \\\n",
530
- "0 278 The Shawshank Redemption \n",
531
- "1 19404 Dilwale Dulhania Le Jayenge \n",
532
- "2 238 The Godfather \n",
533
- "3 424 Schindler's List \n",
534
- "4 240 The Godfather: Part II \n",
535
- "... ... ... \n",
536
- "9995 10196 The Last Airbender \n",
537
- "9996 331446 Sharknado 3: Oh Hell No! \n",
538
- "9997 13995 Captain America \n",
539
- "9998 2312 In the Name of the King: A Dungeon Siege Tale \n",
540
- "9999 455957 Domino \n",
541
- "\n",
542
- " tags \n",
543
- "0 Framed in the 1940s for the double murder of h... \n",
544
- "1 Raj is a rich, carefree, happy-go-lucky second... \n",
545
- "2 Spanning the years 1945 to 1955, a chronicle o... \n",
546
- "3 The true story of how businessman Oskar Schind... \n",
547
- "4 In the continuing saga of the Corleone crime f... \n",
548
- "... ... \n",
549
- "9995 The story follows the adventures of Aang, a yo... \n",
550
- "9996 The sharks take bite out of the East Coast whe... \n",
551
- "9997 During World War II, a brave, patriotic Americ... \n",
552
- "9998 A man named Farmer sets out to rescue his kidn... \n",
553
- "9999 Seeking justice for his partner’s murder by an... \n",
554
- "\n",
555
- "[10000 rows x 3 columns]"
556
- ]
557
- },
558
- "execution_count": 39,
559
- "metadata": {},
560
- "output_type": "execute_result"
561
- }
562
- ],
563
- "source": [
564
- "new_movies"
565
- ]
566
- },
567
- {
568
- "cell_type": "code",
569
- "execution_count": 40,
570
- "metadata": {},
571
- "outputs": [
572
- {
573
- "name": "stdout",
574
- "output_type": "stream",
575
- "text": [
576
- "Requirement already satisfied: scikit-learn in c:\\users\\shiva\\.conda\\envs\\aienv\\lib\\site-packages (1.6.0)\n",
577
- "Requirement already satisfied: numpy>=1.19.5 in c:\\users\\shiva\\.conda\\envs\\aienv\\lib\\site-packages (from scikit-learn) (2.2.1)\n",
578
- "Requirement already satisfied: scipy>=1.6.0 in c:\\users\\shiva\\.conda\\envs\\aienv\\lib\\site-packages (from scikit-learn) (1.14.1)\n",
579
- "Requirement already satisfied: joblib>=1.2.0 in c:\\users\\shiva\\.conda\\envs\\aienv\\lib\\site-packages (from scikit-learn) (1.4.2)\n",
580
- "Requirement already satisfied: threadpoolctl>=3.1.0 in c:\\users\\shiva\\.conda\\envs\\aienv\\lib\\site-packages (from scikit-learn) (3.5.0)\n",
581
- "Note: you may need to restart the kernel to use updated packages.\n"
582
- ]
583
- }
584
- ],
585
- "source": [
586
- "# Install scikit-learn package\n",
587
- "%pip install scikit-learn\n",
588
- "\n",
589
- "from sklearn.feature_extraction.text import CountVectorizer"
590
- ]
591
- },
592
- {
593
- "cell_type": "code",
594
- "execution_count": 41,
595
- "metadata": {},
596
- "outputs": [],
597
- "source": [
598
- "cv=CountVectorizer(max_features=10000 , stop_words='english')"
599
- ]
600
- },
601
- {
602
- "cell_type": "code",
603
- "execution_count": 42,
604
- "metadata": {},
605
- "outputs": [
606
- {
607
- "data": {
608
- "text/html": [
609
- "<style>#sk-container-id-2 {\n",
610
- " /* Definition of color scheme common for light and dark mode */\n",
611
- " --sklearn-color-text: #000;\n",
612
- " --sklearn-color-text-muted: #666;\n",
613
- " --sklearn-color-line: gray;\n",
614
- " /* Definition of color scheme for unfitted estimators */\n",
615
- " --sklearn-color-unfitted-level-0: #fff5e6;\n",
616
- " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
617
- " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
618
- " --sklearn-color-unfitted-level-3: chocolate;\n",
619
- " /* Definition of color scheme for fitted estimators */\n",
620
- " --sklearn-color-fitted-level-0: #f0f8ff;\n",
621
- " --sklearn-color-fitted-level-1: #d4ebff;\n",
622
- " --sklearn-color-fitted-level-2: #b3dbfd;\n",
623
- " --sklearn-color-fitted-level-3: cornflowerblue;\n",
624
- "\n",
625
- " /* Specific color for light theme */\n",
626
- " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
627
- " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
628
- " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
629
- " --sklearn-color-icon: #696969;\n",
630
- "\n",
631
- " @media (prefers-color-scheme: dark) {\n",
632
- " /* Redefinition of color scheme for dark theme */\n",
633
- " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
634
- " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
635
- " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
636
- " --sklearn-color-icon: #878787;\n",
637
- " }\n",
638
- "}\n",
639
- "\n",
640
- "#sk-container-id-2 {\n",
641
- " color: var(--sklearn-color-text);\n",
642
- "}\n",
643
- "\n",
644
- "#sk-container-id-2 pre {\n",
645
- " padding: 0;\n",
646
- "}\n",
647
- "\n",
648
- "#sk-container-id-2 input.sk-hidden--visually {\n",
649
- " border: 0;\n",
650
- " clip: rect(1px 1px 1px 1px);\n",
651
- " clip: rect(1px, 1px, 1px, 1px);\n",
652
- " height: 1px;\n",
653
- " margin: -1px;\n",
654
- " overflow: hidden;\n",
655
- " padding: 0;\n",
656
- " position: absolute;\n",
657
- " width: 1px;\n",
658
- "}\n",
659
- "\n",
660
- "#sk-container-id-2 div.sk-dashed-wrapped {\n",
661
- " border: 1px dashed var(--sklearn-color-line);\n",
662
- " margin: 0 0.4em 0.5em 0.4em;\n",
663
- " box-sizing: border-box;\n",
664
- " padding-bottom: 0.4em;\n",
665
- " background-color: var(--sklearn-color-background);\n",
666
- "}\n",
667
- "\n",
668
- "#sk-container-id-2 div.sk-container {\n",
669
- " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
670
- " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
671
- " so we also need the `!important` here to be able to override the\n",
672
- " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
673
- " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
674
- " display: inline-block !important;\n",
675
- " position: relative;\n",
676
- "}\n",
677
- "\n",
678
- "#sk-container-id-2 div.sk-text-repr-fallback {\n",
679
- " display: none;\n",
680
- "}\n",
681
- "\n",
682
- "div.sk-parallel-item,\n",
683
- "div.sk-serial,\n",
684
- "div.sk-item {\n",
685
- " /* draw centered vertical line to link estimators */\n",
686
- " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
687
- " background-size: 2px 100%;\n",
688
- " background-repeat: no-repeat;\n",
689
- " background-position: center center;\n",
690
- "}\n",
691
- "\n",
692
- "/* Parallel-specific style estimator block */\n",
693
- "\n",
694
- "#sk-container-id-2 div.sk-parallel-item::after {\n",
695
- " content: \"\";\n",
696
- " width: 100%;\n",
697
- " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
698
- " flex-grow: 1;\n",
699
- "}\n",
700
- "\n",
701
- "#sk-container-id-2 div.sk-parallel {\n",
702
- " display: flex;\n",
703
- " align-items: stretch;\n",
704
- " justify-content: center;\n",
705
- " background-color: var(--sklearn-color-background);\n",
706
- " position: relative;\n",
707
- "}\n",
708
- "\n",
709
- "#sk-container-id-2 div.sk-parallel-item {\n",
710
- " display: flex;\n",
711
- " flex-direction: column;\n",
712
- "}\n",
713
- "\n",
714
- "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
715
- " align-self: flex-end;\n",
716
- " width: 50%;\n",
717
- "}\n",
718
- "\n",
719
- "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
720
- " align-self: flex-start;\n",
721
- " width: 50%;\n",
722
- "}\n",
723
- "\n",
724
- "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
725
- " width: 0;\n",
726
- "}\n",
727
- "\n",
728
- "/* Serial-specific style estimator block */\n",
729
- "\n",
730
- "#sk-container-id-2 div.sk-serial {\n",
731
- " display: flex;\n",
732
- " flex-direction: column;\n",
733
- " align-items: center;\n",
734
- " background-color: var(--sklearn-color-background);\n",
735
- " padding-right: 1em;\n",
736
- " padding-left: 1em;\n",
737
- "}\n",
738
- "\n",
739
- "\n",
740
- "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
741
- "clickable and can be expanded/collapsed.\n",
742
- "- Pipeline and ColumnTransformer use this feature and define the default style\n",
743
- "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
744
- "*/\n",
745
- "\n",
746
- "/* Pipeline and ColumnTransformer style (default) */\n",
747
- "\n",
748
- "#sk-container-id-2 div.sk-toggleable {\n",
749
- " /* Default theme specific background. It is overwritten whether we have a\n",
750
- " specific estimator or a Pipeline/ColumnTransformer */\n",
751
- " background-color: var(--sklearn-color-background);\n",
752
- "}\n",
753
- "\n",
754
- "/* Toggleable label */\n",
755
- "#sk-container-id-2 label.sk-toggleable__label {\n",
756
- " cursor: pointer;\n",
757
- " display: flex;\n",
758
- " width: 100%;\n",
759
- " margin-bottom: 0;\n",
760
- " padding: 0.5em;\n",
761
- " box-sizing: border-box;\n",
762
- " text-align: center;\n",
763
- " align-items: start;\n",
764
- " justify-content: space-between;\n",
765
- " gap: 0.5em;\n",
766
- "}\n",
767
- "\n",
768
- "#sk-container-id-2 label.sk-toggleable__label .caption {\n",
769
- " font-size: 0.6rem;\n",
770
- " font-weight: lighter;\n",
771
- " color: var(--sklearn-color-text-muted);\n",
772
- "}\n",
773
- "\n",
774
- "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
775
- " /* Arrow on the left of the label */\n",
776
- " content: \"▸\";\n",
777
- " float: left;\n",
778
- " margin-right: 0.25em;\n",
779
- " color: var(--sklearn-color-icon);\n",
780
- "}\n",
781
- "\n",
782
- "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
783
- " color: var(--sklearn-color-text);\n",
784
- "}\n",
785
- "\n",
786
- "/* Toggleable content - dropdown */\n",
787
- "\n",
788
- "#sk-container-id-2 div.sk-toggleable__content {\n",
789
- " max-height: 0;\n",
790
- " max-width: 0;\n",
791
- " overflow: hidden;\n",
792
- " text-align: left;\n",
793
- " /* unfitted */\n",
794
- " background-color: var(--sklearn-color-unfitted-level-0);\n",
795
- "}\n",
796
- "\n",
797
- "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
798
- " /* fitted */\n",
799
- " background-color: var(--sklearn-color-fitted-level-0);\n",
800
- "}\n",
801
- "\n",
802
- "#sk-container-id-2 div.sk-toggleable__content pre {\n",
803
- " margin: 0.2em;\n",
804
- " border-radius: 0.25em;\n",
805
- " color: var(--sklearn-color-text);\n",
806
- " /* unfitted */\n",
807
- " background-color: var(--sklearn-color-unfitted-level-0);\n",
808
- "}\n",
809
- "\n",
810
- "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
811
- " /* unfitted */\n",
812
- " background-color: var(--sklearn-color-fitted-level-0);\n",
813
- "}\n",
814
- "\n",
815
- "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
816
- " /* Expand drop-down */\n",
817
- " max-height: 200px;\n",
818
- " max-width: 100%;\n",
819
- " overflow: auto;\n",
820
- "}\n",
821
- "\n",
822
- "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
823
- " content: \"▾\";\n",
824
- "}\n",
825
- "\n",
826
- "/* Pipeline/ColumnTransformer-specific style */\n",
827
- "\n",
828
- "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
829
- " color: var(--sklearn-color-text);\n",
830
- " background-color: var(--sklearn-color-unfitted-level-2);\n",
831
- "}\n",
832
- "\n",
833
- "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
834
- " background-color: var(--sklearn-color-fitted-level-2);\n",
835
- "}\n",
836
- "\n",
837
- "/* Estimator-specific style */\n",
838
- "\n",
839
- "/* Colorize estimator box */\n",
840
- "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
841
- " /* unfitted */\n",
842
- " background-color: var(--sklearn-color-unfitted-level-2);\n",
843
- "}\n",
844
- "\n",
845
- "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
846
- " /* fitted */\n",
847
- " background-color: var(--sklearn-color-fitted-level-2);\n",
848
- "}\n",
849
- "\n",
850
- "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
851
- "#sk-container-id-2 div.sk-label label {\n",
852
- " /* The background is the default theme color */\n",
853
- " color: var(--sklearn-color-text-on-default-background);\n",
854
- "}\n",
855
- "\n",
856
- "/* On hover, darken the color of the background */\n",
857
- "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
858
- " color: var(--sklearn-color-text);\n",
859
- " background-color: var(--sklearn-color-unfitted-level-2);\n",
860
- "}\n",
861
- "\n",
862
- "/* Label box, darken color on hover, fitted */\n",
863
- "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
864
- " color: var(--sklearn-color-text);\n",
865
- " background-color: var(--sklearn-color-fitted-level-2);\n",
866
- "}\n",
867
- "\n",
868
- "/* Estimator label */\n",
869
- "\n",
870
- "#sk-container-id-2 div.sk-label label {\n",
871
- " font-family: monospace;\n",
872
- " font-weight: bold;\n",
873
- " display: inline-block;\n",
874
- " line-height: 1.2em;\n",
875
- "}\n",
876
- "\n",
877
- "#sk-container-id-2 div.sk-label-container {\n",
878
- " text-align: center;\n",
879
- "}\n",
880
- "\n",
881
- "/* Estimator-specific */\n",
882
- "#sk-container-id-2 div.sk-estimator {\n",
883
- " font-family: monospace;\n",
884
- " border: 1px dotted var(--sklearn-color-border-box);\n",
885
- " border-radius: 0.25em;\n",
886
- " box-sizing: border-box;\n",
887
- " margin-bottom: 0.5em;\n",
888
- " /* unfitted */\n",
889
- " background-color: var(--sklearn-color-unfitted-level-0);\n",
890
- "}\n",
891
- "\n",
892
- "#sk-container-id-2 div.sk-estimator.fitted {\n",
893
- " /* fitted */\n",
894
- " background-color: var(--sklearn-color-fitted-level-0);\n",
895
- "}\n",
896
- "\n",
897
- "/* on hover */\n",
898
- "#sk-container-id-2 div.sk-estimator:hover {\n",
899
- " /* unfitted */\n",
900
- " background-color: var(--sklearn-color-unfitted-level-2);\n",
901
- "}\n",
902
- "\n",
903
- "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
904
- " /* fitted */\n",
905
- " background-color: var(--sklearn-color-fitted-level-2);\n",
906
- "}\n",
907
- "\n",
908
- "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
909
- "\n",
910
- "/* Common style for \"i\" and \"?\" */\n",
911
- "\n",
912
- ".sk-estimator-doc-link,\n",
913
- "a:link.sk-estimator-doc-link,\n",
914
- "a:visited.sk-estimator-doc-link {\n",
915
- " float: right;\n",
916
- " font-size: smaller;\n",
917
- " line-height: 1em;\n",
918
- " font-family: monospace;\n",
919
- " background-color: var(--sklearn-color-background);\n",
920
- " border-radius: 1em;\n",
921
- " height: 1em;\n",
922
- " width: 1em;\n",
923
- " text-decoration: none !important;\n",
924
- " margin-left: 0.5em;\n",
925
- " text-align: center;\n",
926
- " /* unfitted */\n",
927
- " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
928
- " color: var(--sklearn-color-unfitted-level-1);\n",
929
- "}\n",
930
- "\n",
931
- ".sk-estimator-doc-link.fitted,\n",
932
- "a:link.sk-estimator-doc-link.fitted,\n",
933
- "a:visited.sk-estimator-doc-link.fitted {\n",
934
- " /* fitted */\n",
935
- " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
936
- " color: var(--sklearn-color-fitted-level-1);\n",
937
- "}\n",
938
- "\n",
939
- "/* On hover */\n",
940
- "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
941
- ".sk-estimator-doc-link:hover,\n",
942
- "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
943
- ".sk-estimator-doc-link:hover {\n",
944
- " /* unfitted */\n",
945
- " background-color: var(--sklearn-color-unfitted-level-3);\n",
946
- " color: var(--sklearn-color-background);\n",
947
- " text-decoration: none;\n",
948
- "}\n",
949
- "\n",
950
- "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
951
- ".sk-estimator-doc-link.fitted:hover,\n",
952
- "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
953
- ".sk-estimator-doc-link.fitted:hover {\n",
954
- " /* fitted */\n",
955
- " background-color: var(--sklearn-color-fitted-level-3);\n",
956
- " color: var(--sklearn-color-background);\n",
957
- " text-decoration: none;\n",
958
- "}\n",
959
- "\n",
960
- "/* Span, style for the box shown on hovering the info icon */\n",
961
- ".sk-estimator-doc-link span {\n",
962
- " display: none;\n",
963
- " z-index: 9999;\n",
964
- " position: relative;\n",
965
- " font-weight: normal;\n",
966
- " right: .2ex;\n",
967
- " padding: .5ex;\n",
968
- " margin: .5ex;\n",
969
- " width: min-content;\n",
970
- " min-width: 20ex;\n",
971
- " max-width: 50ex;\n",
972
- " color: var(--sklearn-color-text);\n",
973
- " box-shadow: 2pt 2pt 4pt #999;\n",
974
- " /* unfitted */\n",
975
- " background: var(--sklearn-color-unfitted-level-0);\n",
976
- " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
977
- "}\n",
978
- "\n",
979
- ".sk-estimator-doc-link.fitted span {\n",
980
- " /* fitted */\n",
981
- " background: var(--sklearn-color-fitted-level-0);\n",
982
- " border: var(--sklearn-color-fitted-level-3);\n",
983
- "}\n",
984
- "\n",
985
- ".sk-estimator-doc-link:hover span {\n",
986
- " display: block;\n",
987
- "}\n",
988
- "\n",
989
- "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
990
- "\n",
991
- "#sk-container-id-2 a.estimator_doc_link {\n",
992
- " float: right;\n",
993
- " font-size: 1rem;\n",
994
- " line-height: 1em;\n",
995
- " font-family: monospace;\n",
996
- " background-color: var(--sklearn-color-background);\n",
997
- " border-radius: 1rem;\n",
998
- " height: 1rem;\n",
999
- " width: 1rem;\n",
1000
- " text-decoration: none;\n",
1001
- " /* unfitted */\n",
1002
- " color: var(--sklearn-color-unfitted-level-1);\n",
1003
- " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
1004
- "}\n",
1005
- "\n",
1006
- "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
1007
- " /* fitted */\n",
1008
- " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
1009
- " color: var(--sklearn-color-fitted-level-1);\n",
1010
- "}\n",
1011
- "\n",
1012
- "/* On hover */\n",
1013
- "#sk-container-id-2 a.estimator_doc_link:hover {\n",
1014
- " /* unfitted */\n",
1015
- " background-color: var(--sklearn-color-unfitted-level-3);\n",
1016
- " color: var(--sklearn-color-background);\n",
1017
- " text-decoration: none;\n",
1018
- "}\n",
1019
- "\n",
1020
- "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
1021
- " /* fitted */\n",
1022
- " background-color: var(--sklearn-color-fitted-level-3);\n",
1023
- "}\n",
1024
- "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>CountVectorizer(max_features=10000, stop_words=&#x27;english&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\"><div><div>CountVectorizer</div></div><div><a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html\">?<span>Documentation for CountVectorizer</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></div></label><div class=\"sk-toggleable__content \"><pre>CountVectorizer(max_features=10000, stop_words=&#x27;english&#x27;)</pre></div> </div></div></div></div>"
1025
- ],
1026
- "text/plain": [
1027
- "CountVectorizer(max_features=10000, stop_words='english')"
1028
- ]
1029
- },
1030
- "execution_count": 42,
1031
- "metadata": {},
1032
- "output_type": "execute_result"
1033
- }
1034
- ],
1035
- "source": [
1036
- "cv"
1037
- ]
1038
- },
1039
- {
1040
- "cell_type": "code",
1041
- "execution_count": 43,
1042
- "metadata": {},
1043
- "outputs": [],
1044
- "source": [
1045
- "vector = cv.fit_transform(new_movies['tags'].values.astype('U')).toarray()"
1046
- ]
1047
- },
1048
- {
1049
- "cell_type": "code",
1050
- "execution_count": 44,
1051
- "metadata": {},
1052
- "outputs": [
1053
- {
1054
- "data": {
1055
- "text/plain": [
1056
- "(10000, 10000)"
1057
- ]
1058
- },
1059
- "execution_count": 44,
1060
- "metadata": {},
1061
- "output_type": "execute_result"
1062
- }
1063
- ],
1064
- "source": [
1065
- "vector.shape"
1066
- ]
1067
- },
1068
- {
1069
- "cell_type": "code",
1070
- "execution_count": 45,
1071
- "metadata": {},
1072
- "outputs": [],
1073
- "source": [
1074
- "from sklearn.metrics.pairwise import cosine_similarity"
1075
- ]
1076
- },
1077
- {
1078
- "cell_type": "code",
1079
- "execution_count": 46,
1080
- "metadata": {},
1081
- "outputs": [],
1082
- "source": [
1083
- "similarity = cosine_similarity(vector)"
1084
- ]
1085
- },
1086
- {
1087
- "cell_type": "code",
1088
- "execution_count": 47,
1089
- "metadata": {},
1090
- "outputs": [
1091
- {
1092
- "data": {
1093
- "text/plain": [
1094
- "array([[1. , 0.05634362, 0.13041013, ..., 0.07559289, 0.11065667,\n",
1095
- " 0.06900656],\n",
1096
- " [0.05634362, 1. , 0.07715167, ..., 0. , 0.03636965,\n",
1097
- " 0. ],\n",
1098
- " [0.13041013, 0.07715167, 1. , ..., 0.02300219, 0.0673435 ,\n",
1099
- " 0.09449112],\n",
1100
- " ...,\n",
1101
- " [0.07559289, 0. , 0.02300219, ..., 1. , 0.03253 ,\n",
1102
- " 0.03042903],\n",
1103
- " [0.11065667, 0.03636965, 0.0673435 , ..., 0.03253 , 1. ,\n",
1104
- " 0.04454354],\n",
1105
- " [0.06900656, 0. , 0.09449112, ..., 0.03042903, 0.04454354,\n",
1106
- " 1. ]], shape=(10000, 10000))"
1107
- ]
1108
- },
1109
- "execution_count": 47,
1110
- "metadata": {},
1111
- "output_type": "execute_result"
1112
- }
1113
- ],
1114
- "source": [
1115
- "similarity"
1116
- ]
1117
- },
1118
- {
1119
- "cell_type": "code",
1120
- "execution_count": 48,
1121
- "metadata": {},
1122
- "outputs": [
1123
- {
1124
- "data": {
1125
- "text/plain": [
1126
- "np.int64(2)"
1127
- ]
1128
- },
1129
- "execution_count": 48,
1130
- "metadata": {},
1131
- "output_type": "execute_result"
1132
- }
1133
- ],
1134
- "source": [
1135
- "new_movies[new_movies['title']==\"The Godfather\"].index[0]"
1136
- ]
1137
- },
1138
- {
1139
- "cell_type": "code",
1140
- "execution_count": 49,
1141
- "metadata": {},
1142
- "outputs": [
1143
- {
1144
- "name": "stdout",
1145
- "output_type": "stream",
1146
- "text": [
1147
- "The Godfather\n",
1148
- "The Godfather: Part II\n",
1149
- "Blood Ties\n",
1150
- "Joker\n",
1151
- "Bomb City\n"
1152
- ]
1153
- }
1154
- ],
1155
- "source": [
1156
- "distance = sorted(list(enumerate(similarity[2])) , reverse=True, key=lambda vector:vector[1])\n",
1157
- "for i in distance[0:5]:\n",
1158
- " print(new_movies.iloc[i[0]].title)"
1159
- ]
1160
- },
1161
- {
1162
- "cell_type": "code",
1163
- "execution_count": 50,
1164
- "metadata": {},
1165
- "outputs": [],
1166
- "source": [
1167
- "def recommend(movies):\n",
1168
- " index=new_movies[new_movies['title']==movies].index[0]\n",
1169
- " distance = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda vector:vector[1])\n",
1170
- " for i in distance[0:5]:\n",
1171
- " print(new_movies.iloc[i[0]].title)"
1172
- ]
1173
- },
1174
- {
1175
- "cell_type": "code",
1176
- "execution_count": 51,
1177
- "metadata": {},
1178
- "outputs": [
1179
- {
1180
- "name": "stdout",
1181
- "output_type": "stream",
1182
- "text": [
1183
- "Iron Man\n",
1184
- "Iron Man 3\n",
1185
- "Guardians of the Galaxy Vol. 2\n",
1186
- "Avengers: Age of Ultron\n",
1187
- "Star Wars: Episode III - Revenge of the Sith\n"
1188
- ]
1189
- }
1190
- ],
1191
- "source": [
1192
- "recommend(\"Iron Man\")"
1193
- ]
1194
- },
1195
- {
1196
- "cell_type": "code",
1197
- "execution_count": 52,
1198
- "metadata": {},
1199
- "outputs": [],
1200
- "source": [
1201
- "import pickle"
1202
- ]
1203
- },
1204
- {
1205
- "cell_type": "code",
1206
- "execution_count": 53,
1207
- "metadata": {},
1208
- "outputs": [],
1209
- "source": [
1210
- "pickle.dump(new_movies , open('movies_list.pkl' , 'wb'))"
1211
- ]
1212
- },
1213
- {
1214
- "cell_type": "code",
1215
- "execution_count": 54,
1216
- "metadata": {},
1217
- "outputs": [],
1218
- "source": [
1219
- "pickle.dump(similarity,open('similarity.pkl','wb'))"
1220
- ]
1221
- },
1222
- {
1223
- "cell_type": "code",
1224
- "execution_count": 55,
1225
- "metadata": {},
1226
- "outputs": [
1227
- {
1228
- "data": {
1229
- "text/html": [
1230
- "<div>\n",
1231
- "<style scoped>\n",
1232
- " .dataframe tbody tr th:only-of-type {\n",
1233
- " vertical-align: middle;\n",
1234
- " }\n",
1235
- "\n",
1236
- " .dataframe tbody tr th {\n",
1237
- " vertical-align: top;\n",
1238
- " }\n",
1239
- "\n",
1240
- " .dataframe thead th {\n",
1241
- " text-align: right;\n",
1242
- " }\n",
1243
- "</style>\n",
1244
- "<table border=\"1\" class=\"dataframe\">\n",
1245
- " <thead>\n",
1246
- " <tr style=\"text-align: right;\">\n",
1247
- " <th></th>\n",
1248
- " <th>id</th>\n",
1249
- " <th>title</th>\n",
1250
- " <th>tags</th>\n",
1251
- " </tr>\n",
1252
- " </thead>\n",
1253
- " <tbody>\n",
1254
- " <tr>\n",
1255
- " <th>0</th>\n",
1256
- " <td>278</td>\n",
1257
- " <td>The Shawshank Redemption</td>\n",
1258
- " <td>Framed in the 1940s for the double murder of h...</td>\n",
1259
- " </tr>\n",
1260
- " <tr>\n",
1261
- " <th>1</th>\n",
1262
- " <td>19404</td>\n",
1263
- " <td>Dilwale Dulhania Le Jayenge</td>\n",
1264
- " <td>Raj is a rich, carefree, happy-go-lucky second...</td>\n",
1265
- " </tr>\n",
1266
- " <tr>\n",
1267
- " <th>2</th>\n",
1268
- " <td>238</td>\n",
1269
- " <td>The Godfather</td>\n",
1270
- " <td>Spanning the years 1945 to 1955, a chronicle o...</td>\n",
1271
- " </tr>\n",
1272
- " <tr>\n",
1273
- " <th>3</th>\n",
1274
- " <td>424</td>\n",
1275
- " <td>Schindler's List</td>\n",
1276
- " <td>The true story of how businessman Oskar Schind...</td>\n",
1277
- " </tr>\n",
1278
- " <tr>\n",
1279
- " <th>4</th>\n",
1280
- " <td>240</td>\n",
1281
- " <td>The Godfather: Part II</td>\n",
1282
- " <td>In the continuing saga of the Corleone crime f...</td>\n",
1283
- " </tr>\n",
1284
- " <tr>\n",
1285
- " <th>...</th>\n",
1286
- " <td>...</td>\n",
1287
- " <td>...</td>\n",
1288
- " <td>...</td>\n",
1289
- " </tr>\n",
1290
- " <tr>\n",
1291
- " <th>9995</th>\n",
1292
- " <td>10196</td>\n",
1293
- " <td>The Last Airbender</td>\n",
1294
- " <td>The story follows the adventures of Aang, a yo...</td>\n",
1295
- " </tr>\n",
1296
- " <tr>\n",
1297
- " <th>9996</th>\n",
1298
- " <td>331446</td>\n",
1299
- " <td>Sharknado 3: Oh Hell No!</td>\n",
1300
- " <td>The sharks take bite out of the East Coast whe...</td>\n",
1301
- " </tr>\n",
1302
- " <tr>\n",
1303
- " <th>9997</th>\n",
1304
- " <td>13995</td>\n",
1305
- " <td>Captain America</td>\n",
1306
- " <td>During World War II, a brave, patriotic Americ...</td>\n",
1307
- " </tr>\n",
1308
- " <tr>\n",
1309
- " <th>9998</th>\n",
1310
- " <td>2312</td>\n",
1311
- " <td>In the Name of the King: A Dungeon Siege Tale</td>\n",
1312
- " <td>A man named Farmer sets out to rescue his kidn...</td>\n",
1313
- " </tr>\n",
1314
- " <tr>\n",
1315
- " <th>9999</th>\n",
1316
- " <td>455957</td>\n",
1317
- " <td>Domino</td>\n",
1318
- " <td>Seeking justice for his partner’s murder by an...</td>\n",
1319
- " </tr>\n",
1320
- " </tbody>\n",
1321
- "</table>\n",
1322
- "<p>10000 rows × 3 columns</p>\n",
1323
- "</div>"
1324
- ],
1325
- "text/plain": [
1326
- " id title \\\n",
1327
- "0 278 The Shawshank Redemption \n",
1328
- "1 19404 Dilwale Dulhania Le Jayenge \n",
1329
- "2 238 The Godfather \n",
1330
- "3 424 Schindler's List \n",
1331
- "4 240 The Godfather: Part II \n",
1332
- "... ... ... \n",
1333
- "9995 10196 The Last Airbender \n",
1334
- "9996 331446 Sharknado 3: Oh Hell No! \n",
1335
- "9997 13995 Captain America \n",
1336
- "9998 2312 In the Name of the King: A Dungeon Siege Tale \n",
1337
- "9999 455957 Domino \n",
1338
- "\n",
1339
- " tags \n",
1340
- "0 Framed in the 1940s for the double murder of h... \n",
1341
- "1 Raj is a rich, carefree, happy-go-lucky second... \n",
1342
- "2 Spanning the years 1945 to 1955, a chronicle o... \n",
1343
- "3 The true story of how businessman Oskar Schind... \n",
1344
- "4 In the continuing saga of the Corleone crime f... \n",
1345
- "... ... \n",
1346
- "9995 The story follows the adventures of Aang, a yo... \n",
1347
- "9996 The sharks take bite out of the East Coast whe... \n",
1348
- "9997 During World War II, a brave, patriotic Americ... \n",
1349
- "9998 A man named Farmer sets out to rescue his kidn... \n",
1350
- "9999 Seeking justice for his partner’s murder by an... \n",
1351
- "\n",
1352
- "[10000 rows x 3 columns]"
1353
- ]
1354
- },
1355
- "execution_count": 55,
1356
- "metadata": {},
1357
- "output_type": "execute_result"
1358
- }
1359
- ],
1360
- "source": [
1361
- "pickle.load(open('movies_list.pkl','rb'))"
1362
- ]
1363
- },
1364
- {
1365
- "cell_type": "code",
1366
- "execution_count": null,
1367
- "metadata": {},
1368
- "outputs": [],
1369
- "source": []
1370
- }
1371
- ],
1372
- "metadata": {
1373
- "kernelspec": {
1374
- "display_name": "aienv",
1375
- "language": "python",
1376
- "name": "python3"
1377
- },
1378
- "language_info": {
1379
- "codemirror_mode": {
1380
- "name": "ipython",
1381
- "version": 3
1382
- },
1383
- "file_extension": ".py",
1384
- "mimetype": "text/x-python",
1385
- "name": "python",
1386
- "nbconvert_exporter": "python",
1387
- "pygments_lexer": "ipython3",
1388
- "version": "3.10.16"
1389
- }
1390
- },
1391
- "nbformat": 4,
1392
- "nbformat_minor": 2
1393
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Movie-recommendation-system/ssh.py DELETED
@@ -1,15 +0,0 @@
1
- import requests
2
- from requests.packages.urllib3.exceptions import InsecureRequestWarning
3
- requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
4
-
5
- def fetch_url():
6
- url = "https://huggingface.co/spaces/Shiva7706/ML_Movie_recommendation_system"
7
- try:
8
- response = requests.get(url, verify=False)
9
- print(f"Status Code: {response.status_code}")
10
- print("Connection successful!")
11
- except Exception as e:
12
- print(f"Error: {e}")
13
-
14
- if __name__ == "__main__":
15
- fetch_url()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Movie-recommendation-system/top10K-TMDB-movies.csv DELETED
The diff for this file is too large to render. See raw diff