Shiva7706 commited on
Commit
aa6ea28
·
1 Parent(s): 0fd0efa

movie recommendation syste,

Browse files
Movie-recommendation-system/README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Movie-recommendation-system
2
+ Movie Recommendation System , My first ML project
Movie-recommendation-system/app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pseudo code
2
+
3
+ import streamlit as st
4
+ import pickle
5
+ import requests
6
+
7
+ movies = pickle.load(open("movies_list.pkl", 'rb'))
8
+ similarity = pickle.load(open("similarity.pkl",'rb'))
9
+ movies_list = movies['title'].values
10
+ st.header("Movie Recommender System")
11
+ selectvalue=st.selectbox("Selcet movie from dropdown", movies_list)
12
+
13
+ def fetch_poster(movie_id):
14
+ try:
15
+ url = "https://api.themoviedb.org/3/movie/{}?api_key=8cfe8dff1a6fff88fe27b573ee65c035&language=en-US".format(movie_id)
16
+ data = requests.get(url)
17
+ data = data.json()
18
+ poster_path = data['poster_path']
19
+ full_path = "https://image.tmdb.org/t/p/w500/" + poster_path
20
+ return full_path
21
+ except requests.exceptions.SSLError:
22
+ # Handle the error gracefully
23
+ st.error("Unable to fetch poster due to SSL verification error")
24
+ return "placeholder_image_url" # Return a default image URL
25
+
26
+ def recommend(movie):
27
+ index=movies[movies['title']==movie].index[0]
28
+ distance = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda vector:vector[1])
29
+ recommend_movie=[]
30
+ recommend_poster=[]
31
+ for i in distance[1:6]:
32
+ movies_id=movies.iloc[i[0]].id
33
+ recommend_movie.append(movies.iloc[i[0]].title)
34
+ recommend_poster.append(fetch_poster(movies_id))
35
+ return recommend_movie, recommend_poster
36
+
37
+ if st.button("Show Recommend"):
38
+ movie_name, movie_poster = recommend(selectvalue)
39
+ col1 , col2, col3, col4, col5 = st.columns(5)
40
+ with col1:
41
+ st.text(movie_name[0])
42
+ st.image(movie_poster[0])
43
+ with col2:
44
+ st.text(movie_name[1])
45
+ st.image(movie_poster[1])
46
+ with col3:
47
+ st.text(movie_name[2])
48
+ st.image(movie_poster[2])
49
+ with col4:
50
+ st.text(movie_name[3])
51
+ st.image(movie_poster[3])
52
+ with col5:
53
+ st.text(movie_name[4])
54
+ st.image(movie_poster[4])
Movie-recommendation-system/app_2.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main code
2
+
3
+ import streamlit as st
4
+ import pickle
5
+ import requests
6
+
7
+ movies = pickle.load(open("movies_list.pkl", 'rb'))
8
+ similarity = pickle.load(open("similarity.pkl",'rb'))
9
+ movies_list = movies['title'].values
10
+ st.header("Movie Recommender System")
11
+ selectvalue = st.selectbox("Select movie from dropdown", movies_list)
12
+
13
+ def fetch_poster(movie_id):
14
+ try:
15
+ url = "https://api.themoviedb.org/3/movie/{}?api_key=8cfe8dff1a6fff88fe27b573ee65c035&language=en-US".format(movie_id)
16
+ # Disablinb SSL verification for development
17
+ data = requests.get(url, verify=False)
18
+ data = data.json()
19
+ poster_path = data['poster_path']
20
+ if poster_path:
21
+ full_path = "https://image.tmdb.org/t/p/w500/" + poster_path
22
+ return full_path
23
+ else:
24
+ st.warning(f"No poster found for movie ID {movie_id}")
25
+ return None
26
+ except Exception as e:
27
+ st.error(f"Error fetching poster: {str(e)}")
28
+ return None
29
+
30
+ def recommend(movie):
31
+ index = movies[movies['title']==movie].index[0]
32
+ distance = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda vector:vector[1])
33
+ recommend_movie = []
34
+ recommend_poster = []
35
+ for i in distance[1:6]:
36
+ movies_id = movies.iloc[i[0]].id
37
+ recommend_movie.append(movies.iloc[i[0]].title)
38
+ poster = fetch_poster(movies_id)
39
+ recommend_poster.append(poster)
40
+ return recommend_movie, recommend_poster
41
+
42
+ if st.button("Show Recommend"):
43
+ movie_name, movie_poster = recommend(selectvalue)
44
+ cols = st.columns(5)
45
+ for idx, (col, name, poster) in enumerate(zip(cols, movie_name, movie_poster)):
46
+ with col:
47
+ st.text(name)
48
+ if poster: # showing image if url exists
49
+ st.image(poster)
50
+ else:
51
+ st.write("No poster available")
Movie-recommendation-system/main.ipynb ADDED
@@ -0,0 +1,1393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 2,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "movies = pd.read_csv('top10K-TMDB-movies.csv')"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 3,
24
+ "metadata": {},
25
+ "outputs": [
26
+ {
27
+ "data": {
28
+ "text/html": [
29
+ "<div>\n",
30
+ "<style scoped>\n",
31
+ " .dataframe tbody tr th:only-of-type {\n",
32
+ " vertical-align: middle;\n",
33
+ " }\n",
34
+ "\n",
35
+ " .dataframe tbody tr th {\n",
36
+ " vertical-align: top;\n",
37
+ " }\n",
38
+ "\n",
39
+ " .dataframe thead th {\n",
40
+ " text-align: right;\n",
41
+ " }\n",
42
+ "</style>\n",
43
+ "<table border=\"1\" class=\"dataframe\">\n",
44
+ " <thead>\n",
45
+ " <tr style=\"text-align: right;\">\n",
46
+ " <th></th>\n",
47
+ " <th>id</th>\n",
48
+ " <th>title</th>\n",
49
+ " <th>genre</th>\n",
50
+ " <th>original_language</th>\n",
51
+ " <th>overview</th>\n",
52
+ " <th>popularity</th>\n",
53
+ " <th>release_date</th>\n",
54
+ " <th>vote_average</th>\n",
55
+ " <th>vote_count</th>\n",
56
+ " </tr>\n",
57
+ " </thead>\n",
58
+ " <tbody>\n",
59
+ " <tr>\n",
60
+ " <th>0</th>\n",
61
+ " <td>278</td>\n",
62
+ " <td>The Shawshank Redemption</td>\n",
63
+ " <td>Drama,Crime</td>\n",
64
+ " <td>en</td>\n",
65
+ " <td>Framed in the 1940s for the double murder of h...</td>\n",
66
+ " <td>94.075</td>\n",
67
+ " <td>1994-09-23</td>\n",
68
+ " <td>8.7</td>\n",
69
+ " <td>21862</td>\n",
70
+ " </tr>\n",
71
+ " <tr>\n",
72
+ " <th>1</th>\n",
73
+ " <td>19404</td>\n",
74
+ " <td>Dilwale Dulhania Le Jayenge</td>\n",
75
+ " <td>Comedy,Drama,Romance</td>\n",
76
+ " <td>hi</td>\n",
77
+ " <td>Raj is a rich, carefree, happy-go-lucky second...</td>\n",
78
+ " <td>25.408</td>\n",
79
+ " <td>1995-10-19</td>\n",
80
+ " <td>8.7</td>\n",
81
+ " <td>3731</td>\n",
82
+ " </tr>\n",
83
+ " <tr>\n",
84
+ " <th>2</th>\n",
85
+ " <td>238</td>\n",
86
+ " <td>The Godfather</td>\n",
87
+ " <td>Drama,Crime</td>\n",
88
+ " <td>en</td>\n",
89
+ " <td>Spanning the years 1945 to 1955, a chronicle o...</td>\n",
90
+ " <td>90.585</td>\n",
91
+ " <td>1972-03-14</td>\n",
92
+ " <td>8.7</td>\n",
93
+ " <td>16280</td>\n",
94
+ " </tr>\n",
95
+ " <tr>\n",
96
+ " <th>3</th>\n",
97
+ " <td>424</td>\n",
98
+ " <td>Schindler's List</td>\n",
99
+ " <td>Drama,History,War</td>\n",
100
+ " <td>en</td>\n",
101
+ " <td>The true story of how businessman Oskar Schind...</td>\n",
102
+ " <td>44.761</td>\n",
103
+ " <td>1993-12-15</td>\n",
104
+ " <td>8.6</td>\n",
105
+ " <td>12959</td>\n",
106
+ " </tr>\n",
107
+ " <tr>\n",
108
+ " <th>4</th>\n",
109
+ " <td>240</td>\n",
110
+ " <td>The Godfather: Part II</td>\n",
111
+ " <td>Drama,Crime</td>\n",
112
+ " <td>en</td>\n",
113
+ " <td>In the continuing saga of the Corleone crime f...</td>\n",
114
+ " <td>57.749</td>\n",
115
+ " <td>1974-12-20</td>\n",
116
+ " <td>8.6</td>\n",
117
+ " <td>9811</td>\n",
118
+ " </tr>\n",
119
+ " <tr>\n",
120
+ " <th>5</th>\n",
121
+ " <td>667257</td>\n",
122
+ " <td>Impossible Things</td>\n",
123
+ " <td>Family,Drama</td>\n",
124
+ " <td>es</td>\n",
125
+ " <td>Matilde is a woman who, after the death of her...</td>\n",
126
+ " <td>14.358</td>\n",
127
+ " <td>2021-06-17</td>\n",
128
+ " <td>8.6</td>\n",
129
+ " <td>255</td>\n",
130
+ " </tr>\n",
131
+ " <tr>\n",
132
+ " <th>6</th>\n",
133
+ " <td>129</td>\n",
134
+ " <td>Spirited Away</td>\n",
135
+ " <td>Animation,Family,Fantasy</td>\n",
136
+ " <td>ja</td>\n",
137
+ " <td>A young girl, Chihiro, becomes trapped in a st...</td>\n",
138
+ " <td>92.056</td>\n",
139
+ " <td>2001-07-20</td>\n",
140
+ " <td>8.5</td>\n",
141
+ " <td>13093</td>\n",
142
+ " </tr>\n",
143
+ " <tr>\n",
144
+ " <th>7</th>\n",
145
+ " <td>730154</td>\n",
146
+ " <td>Your Eyes Tell</td>\n",
147
+ " <td>Romance,Drama</td>\n",
148
+ " <td>ja</td>\n",
149
+ " <td>A tragic accident lead to Kaori's blindness, b...</td>\n",
150
+ " <td>51.345</td>\n",
151
+ " <td>2020-10-23</td>\n",
152
+ " <td>8.5</td>\n",
153
+ " <td>339</td>\n",
154
+ " </tr>\n",
155
+ " <tr>\n",
156
+ " <th>8</th>\n",
157
+ " <td>372754</td>\n",
158
+ " <td>Dou kyu sei – Classmates</td>\n",
159
+ " <td>Romance,Animation</td>\n",
160
+ " <td>ja</td>\n",
161
+ " <td>Rihito Sajo, an honor student with a perfect s...</td>\n",
162
+ " <td>14.285</td>\n",
163
+ " <td>2016-02-20</td>\n",
164
+ " <td>8.5</td>\n",
165
+ " <td>239</td>\n",
166
+ " </tr>\n",
167
+ " <tr>\n",
168
+ " <th>9</th>\n",
169
+ " <td>372058</td>\n",
170
+ " <td>Your Name.</td>\n",
171
+ " <td>Romance,Animation,Drama</td>\n",
172
+ " <td>ja</td>\n",
173
+ " <td>High schoolers Mitsuha and Taki are complete s...</td>\n",
174
+ " <td>158.270</td>\n",
175
+ " <td>2016-08-26</td>\n",
176
+ " <td>8.5</td>\n",
177
+ " <td>8895</td>\n",
178
+ " </tr>\n",
179
+ " </tbody>\n",
180
+ "</table>\n",
181
+ "</div>"
182
+ ],
183
+ "text/plain": [
184
+ " id title genre \\\n",
185
+ "0 278 The Shawshank Redemption Drama,Crime \n",
186
+ "1 19404 Dilwale Dulhania Le Jayenge Comedy,Drama,Romance \n",
187
+ "2 238 The Godfather Drama,Crime \n",
188
+ "3 424 Schindler's List Drama,History,War \n",
189
+ "4 240 The Godfather: Part II Drama,Crime \n",
190
+ "5 667257 Impossible Things Family,Drama \n",
191
+ "6 129 Spirited Away Animation,Family,Fantasy \n",
192
+ "7 730154 Your Eyes Tell Romance,Drama \n",
193
+ "8 372754 Dou kyu sei – Classmates Romance,Animation \n",
194
+ "9 372058 Your Name. Romance,Animation,Drama \n",
195
+ "\n",
196
+ " original_language overview \\\n",
197
+ "0 en Framed in the 1940s for the double murder of h... \n",
198
+ "1 hi Raj is a rich, carefree, happy-go-lucky second... \n",
199
+ "2 en Spanning the years 1945 to 1955, a chronicle o... \n",
200
+ "3 en The true story of how businessman Oskar Schind... \n",
201
+ "4 en In the continuing saga of the Corleone crime f... \n",
202
+ "5 es Matilde is a woman who, after the death of her... \n",
203
+ "6 ja A young girl, Chihiro, becomes trapped in a st... \n",
204
+ "7 ja A tragic accident lead to Kaori's blindness, b... \n",
205
+ "8 ja Rihito Sajo, an honor student with a perfect s... \n",
206
+ "9 ja High schoolers Mitsuha and Taki are complete s... \n",
207
+ "\n",
208
+ " popularity release_date vote_average vote_count \n",
209
+ "0 94.075 1994-09-23 8.7 21862 \n",
210
+ "1 25.408 1995-10-19 8.7 3731 \n",
211
+ "2 90.585 1972-03-14 8.7 16280 \n",
212
+ "3 44.761 1993-12-15 8.6 12959 \n",
213
+ "4 57.749 1974-12-20 8.6 9811 \n",
214
+ "5 14.358 2021-06-17 8.6 255 \n",
215
+ "6 92.056 2001-07-20 8.5 13093 \n",
216
+ "7 51.345 2020-10-23 8.5 339 \n",
217
+ "8 14.285 2016-02-20 8.5 239 \n",
218
+ "9 158.270 2016-08-26 8.5 8895 "
219
+ ]
220
+ },
221
+ "execution_count": 3,
222
+ "metadata": {},
223
+ "output_type": "execute_result"
224
+ }
225
+ ],
226
+ "source": [
227
+ "movies.head(10)"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "code",
232
+ "execution_count": 4,
233
+ "metadata": {},
234
+ "outputs": [
235
+ {
236
+ "data": {
237
+ "text/html": [
238
+ "<div>\n",
239
+ "<style scoped>\n",
240
+ " .dataframe tbody tr th:only-of-type {\n",
241
+ " vertical-align: middle;\n",
242
+ " }\n",
243
+ "\n",
244
+ " .dataframe tbody tr th {\n",
245
+ " vertical-align: top;\n",
246
+ " }\n",
247
+ "\n",
248
+ " .dataframe thead th {\n",
249
+ " text-align: right;\n",
250
+ " }\n",
251
+ "</style>\n",
252
+ "<table border=\"1\" class=\"dataframe\">\n",
253
+ " <thead>\n",
254
+ " <tr style=\"text-align: right;\">\n",
255
+ " <th></th>\n",
256
+ " <th>id</th>\n",
257
+ " <th>popularity</th>\n",
258
+ " <th>vote_average</th>\n",
259
+ " <th>vote_count</th>\n",
260
+ " </tr>\n",
261
+ " </thead>\n",
262
+ " <tbody>\n",
263
+ " <tr>\n",
264
+ " <th>count</th>\n",
265
+ " <td>10000.000000</td>\n",
266
+ " <td>10000.000000</td>\n",
267
+ " <td>10000.000000</td>\n",
268
+ " <td>10000.000000</td>\n",
269
+ " </tr>\n",
270
+ " <tr>\n",
271
+ " <th>mean</th>\n",
272
+ " <td>161243.505000</td>\n",
273
+ " <td>34.697267</td>\n",
274
+ " <td>6.621150</td>\n",
275
+ " <td>1547.309400</td>\n",
276
+ " </tr>\n",
277
+ " <tr>\n",
278
+ " <th>std</th>\n",
279
+ " <td>211422.046043</td>\n",
280
+ " <td>211.684175</td>\n",
281
+ " <td>0.766231</td>\n",
282
+ " <td>2648.295789</td>\n",
283
+ " </tr>\n",
284
+ " <tr>\n",
285
+ " <th>min</th>\n",
286
+ " <td>5.000000</td>\n",
287
+ " <td>0.600000</td>\n",
288
+ " <td>4.600000</td>\n",
289
+ " <td>200.000000</td>\n",
290
+ " </tr>\n",
291
+ " <tr>\n",
292
+ " <th>25%</th>\n",
293
+ " <td>10127.750000</td>\n",
294
+ " <td>9.154750</td>\n",
295
+ " <td>6.100000</td>\n",
296
+ " <td>315.000000</td>\n",
297
+ " </tr>\n",
298
+ " <tr>\n",
299
+ " <th>50%</th>\n",
300
+ " <td>30002.500000</td>\n",
301
+ " <td>13.637500</td>\n",
302
+ " <td>6.600000</td>\n",
303
+ " <td>583.500000</td>\n",
304
+ " </tr>\n",
305
+ " <tr>\n",
306
+ " <th>75%</th>\n",
307
+ " <td>310133.500000</td>\n",
308
+ " <td>25.651250</td>\n",
309
+ " <td>7.200000</td>\n",
310
+ " <td>1460.000000</td>\n",
311
+ " </tr>\n",
312
+ " <tr>\n",
313
+ " <th>max</th>\n",
314
+ " <td>934761.000000</td>\n",
315
+ " <td>10436.917000</td>\n",
316
+ " <td>8.700000</td>\n",
317
+ " <td>31917.000000</td>\n",
318
+ " </tr>\n",
319
+ " </tbody>\n",
320
+ "</table>\n",
321
+ "</div>"
322
+ ],
323
+ "text/plain": [
324
+ " id popularity vote_average vote_count\n",
325
+ "count 10000.000000 10000.000000 10000.000000 10000.000000\n",
326
+ "mean 161243.505000 34.697267 6.621150 1547.309400\n",
327
+ "std 211422.046043 211.684175 0.766231 2648.295789\n",
328
+ "min 5.000000 0.600000 4.600000 200.000000\n",
329
+ "25% 10127.750000 9.154750 6.100000 315.000000\n",
330
+ "50% 30002.500000 13.637500 6.600000 583.500000\n",
331
+ "75% 310133.500000 25.651250 7.200000 1460.000000\n",
332
+ "max 934761.000000 10436.917000 8.700000 31917.000000"
333
+ ]
334
+ },
335
+ "execution_count": 4,
336
+ "metadata": {},
337
+ "output_type": "execute_result"
338
+ }
339
+ ],
340
+ "source": [
341
+ "movies.describe()"
342
+ ]
343
+ },
344
+ {
345
+ "cell_type": "code",
346
+ "execution_count": 5,
347
+ "metadata": {},
348
+ "outputs": [
349
+ {
350
+ "name": "stdout",
351
+ "output_type": "stream",
352
+ "text": [
353
+ "<class 'pandas.core.frame.DataFrame'>\n",
354
+ "RangeIndex: 10000 entries, 0 to 9999\n",
355
+ "Data columns (total 9 columns):\n",
356
+ " # Column Non-Null Count Dtype \n",
357
+ "--- ------ -------------- ----- \n",
358
+ " 0 id 10000 non-null int64 \n",
359
+ " 1 title 10000 non-null object \n",
360
+ " 2 genre 9997 non-null object \n",
361
+ " 3 original_language 10000 non-null object \n",
362
+ " 4 overview 9987 non-null object \n",
363
+ " 5 popularity 10000 non-null float64\n",
364
+ " 6 release_date 10000 non-null object \n",
365
+ " 7 vote_average 10000 non-null float64\n",
366
+ " 8 vote_count 10000 non-null int64 \n",
367
+ "dtypes: float64(2), int64(2), object(5)\n",
368
+ "memory usage: 703.2+ KB\n"
369
+ ]
370
+ }
371
+ ],
372
+ "source": [
373
+ "movies.info()"
374
+ ]
375
+ },
376
+ {
377
+ "cell_type": "code",
378
+ "execution_count": 6,
379
+ "metadata": {},
380
+ "outputs": [
381
+ {
382
+ "data": {
383
+ "text/plain": [
384
+ "Index(['id', 'title', 'genre', 'original_language', 'overview', 'popularity',\n",
385
+ " 'release_date', 'vote_average', 'vote_count'],\n",
386
+ " dtype='object')"
387
+ ]
388
+ },
389
+ "execution_count": 6,
390
+ "metadata": {},
391
+ "output_type": "execute_result"
392
+ }
393
+ ],
394
+ "source": [
395
+ "movies.columns"
396
+ ]
397
+ },
398
+ {
399
+ "cell_type": "code",
400
+ "execution_count": 36,
401
+ "metadata": {},
402
+ "outputs": [],
403
+ "source": [
404
+ "movies=movies[['id','title','overview','genre']]"
405
+ ]
406
+ },
407
+ {
408
+ "cell_type": "code",
409
+ "execution_count": 37,
410
+ "metadata": {},
411
+ "outputs": [],
412
+ "source": [
413
+ "movies['tags']=movies['overview']+movies['genre']"
414
+ ]
415
+ },
416
+ {
417
+ "cell_type": "code",
418
+ "execution_count": 38,
419
+ "metadata": {},
420
+ "outputs": [],
421
+ "source": [
422
+ "new_movies = movies.drop(columns=['overview','genre'])"
423
+ ]
424
+ },
425
+ {
426
+ "cell_type": "code",
427
+ "execution_count": 39,
428
+ "metadata": {},
429
+ "outputs": [
430
+ {
431
+ "data": {
432
+ "text/html": [
433
+ "<div>\n",
434
+ "<style scoped>\n",
435
+ " .dataframe tbody tr th:only-of-type {\n",
436
+ " vertical-align: middle;\n",
437
+ " }\n",
438
+ "\n",
439
+ " .dataframe tbody tr th {\n",
440
+ " vertical-align: top;\n",
441
+ " }\n",
442
+ "\n",
443
+ " .dataframe thead th {\n",
444
+ " text-align: right;\n",
445
+ " }\n",
446
+ "</style>\n",
447
+ "<table border=\"1\" class=\"dataframe\">\n",
448
+ " <thead>\n",
449
+ " <tr style=\"text-align: right;\">\n",
450
+ " <th></th>\n",
451
+ " <th>id</th>\n",
452
+ " <th>title</th>\n",
453
+ " <th>tags</th>\n",
454
+ " </tr>\n",
455
+ " </thead>\n",
456
+ " <tbody>\n",
457
+ " <tr>\n",
458
+ " <th>0</th>\n",
459
+ " <td>278</td>\n",
460
+ " <td>The Shawshank Redemption</td>\n",
461
+ " <td>Framed in the 1940s for the double murder of h...</td>\n",
462
+ " </tr>\n",
463
+ " <tr>\n",
464
+ " <th>1</th>\n",
465
+ " <td>19404</td>\n",
466
+ " <td>Dilwale Dulhania Le Jayenge</td>\n",
467
+ " <td>Raj is a rich, carefree, happy-go-lucky second...</td>\n",
468
+ " </tr>\n",
469
+ " <tr>\n",
470
+ " <th>2</th>\n",
471
+ " <td>238</td>\n",
472
+ " <td>The Godfather</td>\n",
473
+ " <td>Spanning the years 1945 to 1955, a chronicle o...</td>\n",
474
+ " </tr>\n",
475
+ " <tr>\n",
476
+ " <th>3</th>\n",
477
+ " <td>424</td>\n",
478
+ " <td>Schindler's List</td>\n",
479
+ " <td>The true story of how businessman Oskar Schind...</td>\n",
480
+ " </tr>\n",
481
+ " <tr>\n",
482
+ " <th>4</th>\n",
483
+ " <td>240</td>\n",
484
+ " <td>The Godfather: Part II</td>\n",
485
+ " <td>In the continuing saga of the Corleone crime f...</td>\n",
486
+ " </tr>\n",
487
+ " <tr>\n",
488
+ " <th>...</th>\n",
489
+ " <td>...</td>\n",
490
+ " <td>...</td>\n",
491
+ " <td>...</td>\n",
492
+ " </tr>\n",
493
+ " <tr>\n",
494
+ " <th>9995</th>\n",
495
+ " <td>10196</td>\n",
496
+ " <td>The Last Airbender</td>\n",
497
+ " <td>The story follows the adventures of Aang, a yo...</td>\n",
498
+ " </tr>\n",
499
+ " <tr>\n",
500
+ " <th>9996</th>\n",
501
+ " <td>331446</td>\n",
502
+ " <td>Sharknado 3: Oh Hell No!</td>\n",
503
+ " <td>The sharks take bite out of the East Coast whe...</td>\n",
504
+ " </tr>\n",
505
+ " <tr>\n",
506
+ " <th>9997</th>\n",
507
+ " <td>13995</td>\n",
508
+ " <td>Captain America</td>\n",
509
+ " <td>During World War II, a brave, patriotic Americ...</td>\n",
510
+ " </tr>\n",
511
+ " <tr>\n",
512
+ " <th>9998</th>\n",
513
+ " <td>2312</td>\n",
514
+ " <td>In the Name of the King: A Dungeon Siege Tale</td>\n",
515
+ " <td>A man named Farmer sets out to rescue his kidn...</td>\n",
516
+ " </tr>\n",
517
+ " <tr>\n",
518
+ " <th>9999</th>\n",
519
+ " <td>455957</td>\n",
520
+ " <td>Domino</td>\n",
521
+ " <td>Seeking justice for his partner’s murder by an...</td>\n",
522
+ " </tr>\n",
523
+ " </tbody>\n",
524
+ "</table>\n",
525
+ "<p>10000 rows × 3 columns</p>\n",
526
+ "</div>"
527
+ ],
528
+ "text/plain": [
529
+ " id title \\\n",
530
+ "0 278 The Shawshank Redemption \n",
531
+ "1 19404 Dilwale Dulhania Le Jayenge \n",
532
+ "2 238 The Godfather \n",
533
+ "3 424 Schindler's List \n",
534
+ "4 240 The Godfather: Part II \n",
535
+ "... ... ... \n",
536
+ "9995 10196 The Last Airbender \n",
537
+ "9996 331446 Sharknado 3: Oh Hell No! \n",
538
+ "9997 13995 Captain America \n",
539
+ "9998 2312 In the Name of the King: A Dungeon Siege Tale \n",
540
+ "9999 455957 Domino \n",
541
+ "\n",
542
+ " tags \n",
543
+ "0 Framed in the 1940s for the double murder of h... \n",
544
+ "1 Raj is a rich, carefree, happy-go-lucky second... \n",
545
+ "2 Spanning the years 1945 to 1955, a chronicle o... \n",
546
+ "3 The true story of how businessman Oskar Schind... \n",
547
+ "4 In the continuing saga of the Corleone crime f... \n",
548
+ "... ... \n",
549
+ "9995 The story follows the adventures of Aang, a yo... \n",
550
+ "9996 The sharks take bite out of the East Coast whe... \n",
551
+ "9997 During World War II, a brave, patriotic Americ... \n",
552
+ "9998 A man named Farmer sets out to rescue his kidn... \n",
553
+ "9999 Seeking justice for his partner’s murder by an... \n",
554
+ "\n",
555
+ "[10000 rows x 3 columns]"
556
+ ]
557
+ },
558
+ "execution_count": 39,
559
+ "metadata": {},
560
+ "output_type": "execute_result"
561
+ }
562
+ ],
563
+ "source": [
564
+ "new_movies"
565
+ ]
566
+ },
567
+ {
568
+ "cell_type": "code",
569
+ "execution_count": 40,
570
+ "metadata": {},
571
+ "outputs": [
572
+ {
573
+ "name": "stdout",
574
+ "output_type": "stream",
575
+ "text": [
576
+ "Requirement already satisfied: scikit-learn in c:\\users\\shiva\\.conda\\envs\\aienv\\lib\\site-packages (1.6.0)\n",
577
+ "Requirement already satisfied: numpy>=1.19.5 in c:\\users\\shiva\\.conda\\envs\\aienv\\lib\\site-packages (from scikit-learn) (2.2.1)\n",
578
+ "Requirement already satisfied: scipy>=1.6.0 in c:\\users\\shiva\\.conda\\envs\\aienv\\lib\\site-packages (from scikit-learn) (1.14.1)\n",
579
+ "Requirement already satisfied: joblib>=1.2.0 in c:\\users\\shiva\\.conda\\envs\\aienv\\lib\\site-packages (from scikit-learn) (1.4.2)\n",
580
+ "Requirement already satisfied: threadpoolctl>=3.1.0 in c:\\users\\shiva\\.conda\\envs\\aienv\\lib\\site-packages (from scikit-learn) (3.5.0)\n",
581
+ "Note: you may need to restart the kernel to use updated packages.\n"
582
+ ]
583
+ }
584
+ ],
585
+ "source": [
586
+ "# Install scikit-learn package\n",
587
+ "%pip install scikit-learn\n",
588
+ "\n",
589
+ "from sklearn.feature_extraction.text import CountVectorizer"
590
+ ]
591
+ },
592
+ {
593
+ "cell_type": "code",
594
+ "execution_count": 41,
595
+ "metadata": {},
596
+ "outputs": [],
597
+ "source": [
598
+ "cv=CountVectorizer(max_features=10000 , stop_words='english')"
599
+ ]
600
+ },
601
+ {
602
+ "cell_type": "code",
603
+ "execution_count": 42,
604
+ "metadata": {},
605
+ "outputs": [
606
+ {
607
+ "data": {
608
+ "text/html": [
609
+ "<style>#sk-container-id-2 {\n",
610
+ " /* Definition of color scheme common for light and dark mode */\n",
611
+ " --sklearn-color-text: #000;\n",
612
+ " --sklearn-color-text-muted: #666;\n",
613
+ " --sklearn-color-line: gray;\n",
614
+ " /* Definition of color scheme for unfitted estimators */\n",
615
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
616
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
617
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
618
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
619
+ " /* Definition of color scheme for fitted estimators */\n",
620
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
621
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
622
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
623
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
624
+ "\n",
625
+ " /* Specific color for light theme */\n",
626
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
627
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
628
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
629
+ " --sklearn-color-icon: #696969;\n",
630
+ "\n",
631
+ " @media (prefers-color-scheme: dark) {\n",
632
+ " /* Redefinition of color scheme for dark theme */\n",
633
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
634
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
635
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
636
+ " --sklearn-color-icon: #878787;\n",
637
+ " }\n",
638
+ "}\n",
639
+ "\n",
640
+ "#sk-container-id-2 {\n",
641
+ " color: var(--sklearn-color-text);\n",
642
+ "}\n",
643
+ "\n",
644
+ "#sk-container-id-2 pre {\n",
645
+ " padding: 0;\n",
646
+ "}\n",
647
+ "\n",
648
+ "#sk-container-id-2 input.sk-hidden--visually {\n",
649
+ " border: 0;\n",
650
+ " clip: rect(1px 1px 1px 1px);\n",
651
+ " clip: rect(1px, 1px, 1px, 1px);\n",
652
+ " height: 1px;\n",
653
+ " margin: -1px;\n",
654
+ " overflow: hidden;\n",
655
+ " padding: 0;\n",
656
+ " position: absolute;\n",
657
+ " width: 1px;\n",
658
+ "}\n",
659
+ "\n",
660
+ "#sk-container-id-2 div.sk-dashed-wrapped {\n",
661
+ " border: 1px dashed var(--sklearn-color-line);\n",
662
+ " margin: 0 0.4em 0.5em 0.4em;\n",
663
+ " box-sizing: border-box;\n",
664
+ " padding-bottom: 0.4em;\n",
665
+ " background-color: var(--sklearn-color-background);\n",
666
+ "}\n",
667
+ "\n",
668
+ "#sk-container-id-2 div.sk-container {\n",
669
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
670
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
671
+ " so we also need the `!important` here to be able to override the\n",
672
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
673
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
674
+ " display: inline-block !important;\n",
675
+ " position: relative;\n",
676
+ "}\n",
677
+ "\n",
678
+ "#sk-container-id-2 div.sk-text-repr-fallback {\n",
679
+ " display: none;\n",
680
+ "}\n",
681
+ "\n",
682
+ "div.sk-parallel-item,\n",
683
+ "div.sk-serial,\n",
684
+ "div.sk-item {\n",
685
+ " /* draw centered vertical line to link estimators */\n",
686
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
687
+ " background-size: 2px 100%;\n",
688
+ " background-repeat: no-repeat;\n",
689
+ " background-position: center center;\n",
690
+ "}\n",
691
+ "\n",
692
+ "/* Parallel-specific style estimator block */\n",
693
+ "\n",
694
+ "#sk-container-id-2 div.sk-parallel-item::after {\n",
695
+ " content: \"\";\n",
696
+ " width: 100%;\n",
697
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
698
+ " flex-grow: 1;\n",
699
+ "}\n",
700
+ "\n",
701
+ "#sk-container-id-2 div.sk-parallel {\n",
702
+ " display: flex;\n",
703
+ " align-items: stretch;\n",
704
+ " justify-content: center;\n",
705
+ " background-color: var(--sklearn-color-background);\n",
706
+ " position: relative;\n",
707
+ "}\n",
708
+ "\n",
709
+ "#sk-container-id-2 div.sk-parallel-item {\n",
710
+ " display: flex;\n",
711
+ " flex-direction: column;\n",
712
+ "}\n",
713
+ "\n",
714
+ "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
715
+ " align-self: flex-end;\n",
716
+ " width: 50%;\n",
717
+ "}\n",
718
+ "\n",
719
+ "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
720
+ " align-self: flex-start;\n",
721
+ " width: 50%;\n",
722
+ "}\n",
723
+ "\n",
724
+ "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
725
+ " width: 0;\n",
726
+ "}\n",
727
+ "\n",
728
+ "/* Serial-specific style estimator block */\n",
729
+ "\n",
730
+ "#sk-container-id-2 div.sk-serial {\n",
731
+ " display: flex;\n",
732
+ " flex-direction: column;\n",
733
+ " align-items: center;\n",
734
+ " background-color: var(--sklearn-color-background);\n",
735
+ " padding-right: 1em;\n",
736
+ " padding-left: 1em;\n",
737
+ "}\n",
738
+ "\n",
739
+ "\n",
740
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
741
+ "clickable and can be expanded/collapsed.\n",
742
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
743
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
744
+ "*/\n",
745
+ "\n",
746
+ "/* Pipeline and ColumnTransformer style (default) */\n",
747
+ "\n",
748
+ "#sk-container-id-2 div.sk-toggleable {\n",
749
+ " /* Default theme specific background. It is overwritten whether we have a\n",
750
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
751
+ " background-color: var(--sklearn-color-background);\n",
752
+ "}\n",
753
+ "\n",
754
+ "/* Toggleable label */\n",
755
+ "#sk-container-id-2 label.sk-toggleable__label {\n",
756
+ " cursor: pointer;\n",
757
+ " display: flex;\n",
758
+ " width: 100%;\n",
759
+ " margin-bottom: 0;\n",
760
+ " padding: 0.5em;\n",
761
+ " box-sizing: border-box;\n",
762
+ " text-align: center;\n",
763
+ " align-items: start;\n",
764
+ " justify-content: space-between;\n",
765
+ " gap: 0.5em;\n",
766
+ "}\n",
767
+ "\n",
768
+ "#sk-container-id-2 label.sk-toggleable__label .caption {\n",
769
+ " font-size: 0.6rem;\n",
770
+ " font-weight: lighter;\n",
771
+ " color: var(--sklearn-color-text-muted);\n",
772
+ "}\n",
773
+ "\n",
774
+ "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
775
+ " /* Arrow on the left of the label */\n",
776
+ " content: \"▸\";\n",
777
+ " float: left;\n",
778
+ " margin-right: 0.25em;\n",
779
+ " color: var(--sklearn-color-icon);\n",
780
+ "}\n",
781
+ "\n",
782
+ "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
783
+ " color: var(--sklearn-color-text);\n",
784
+ "}\n",
785
+ "\n",
786
+ "/* Toggleable content - dropdown */\n",
787
+ "\n",
788
+ "#sk-container-id-2 div.sk-toggleable__content {\n",
789
+ " max-height: 0;\n",
790
+ " max-width: 0;\n",
791
+ " overflow: hidden;\n",
792
+ " text-align: left;\n",
793
+ " /* unfitted */\n",
794
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
795
+ "}\n",
796
+ "\n",
797
+ "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
798
+ " /* fitted */\n",
799
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
800
+ "}\n",
801
+ "\n",
802
+ "#sk-container-id-2 div.sk-toggleable__content pre {\n",
803
+ " margin: 0.2em;\n",
804
+ " border-radius: 0.25em;\n",
805
+ " color: var(--sklearn-color-text);\n",
806
+ " /* unfitted */\n",
807
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
808
+ "}\n",
809
+ "\n",
810
+ "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
811
+ " /* unfitted */\n",
812
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
813
+ "}\n",
814
+ "\n",
815
+ "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
816
+ " /* Expand drop-down */\n",
817
+ " max-height: 200px;\n",
818
+ " max-width: 100%;\n",
819
+ " overflow: auto;\n",
820
+ "}\n",
821
+ "\n",
822
+ "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
823
+ " content: \"▾\";\n",
824
+ "}\n",
825
+ "\n",
826
+ "/* Pipeline/ColumnTransformer-specific style */\n",
827
+ "\n",
828
+ "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
829
+ " color: var(--sklearn-color-text);\n",
830
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
831
+ "}\n",
832
+ "\n",
833
+ "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
834
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
835
+ "}\n",
836
+ "\n",
837
+ "/* Estimator-specific style */\n",
838
+ "\n",
839
+ "/* Colorize estimator box */\n",
840
+ "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
841
+ " /* unfitted */\n",
842
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
843
+ "}\n",
844
+ "\n",
845
+ "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
846
+ " /* fitted */\n",
847
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
848
+ "}\n",
849
+ "\n",
850
+ "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
851
+ "#sk-container-id-2 div.sk-label label {\n",
852
+ " /* The background is the default theme color */\n",
853
+ " color: var(--sklearn-color-text-on-default-background);\n",
854
+ "}\n",
855
+ "\n",
856
+ "/* On hover, darken the color of the background */\n",
857
+ "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
858
+ " color: var(--sklearn-color-text);\n",
859
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
860
+ "}\n",
861
+ "\n",
862
+ "/* Label box, darken color on hover, fitted */\n",
863
+ "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
864
+ " color: var(--sklearn-color-text);\n",
865
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
866
+ "}\n",
867
+ "\n",
868
+ "/* Estimator label */\n",
869
+ "\n",
870
+ "#sk-container-id-2 div.sk-label label {\n",
871
+ " font-family: monospace;\n",
872
+ " font-weight: bold;\n",
873
+ " display: inline-block;\n",
874
+ " line-height: 1.2em;\n",
875
+ "}\n",
876
+ "\n",
877
+ "#sk-container-id-2 div.sk-label-container {\n",
878
+ " text-align: center;\n",
879
+ "}\n",
880
+ "\n",
881
+ "/* Estimator-specific */\n",
882
+ "#sk-container-id-2 div.sk-estimator {\n",
883
+ " font-family: monospace;\n",
884
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
885
+ " border-radius: 0.25em;\n",
886
+ " box-sizing: border-box;\n",
887
+ " margin-bottom: 0.5em;\n",
888
+ " /* unfitted */\n",
889
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
890
+ "}\n",
891
+ "\n",
892
+ "#sk-container-id-2 div.sk-estimator.fitted {\n",
893
+ " /* fitted */\n",
894
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
895
+ "}\n",
896
+ "\n",
897
+ "/* on hover */\n",
898
+ "#sk-container-id-2 div.sk-estimator:hover {\n",
899
+ " /* unfitted */\n",
900
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
901
+ "}\n",
902
+ "\n",
903
+ "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
904
+ " /* fitted */\n",
905
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
906
+ "}\n",
907
+ "\n",
908
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
909
+ "\n",
910
+ "/* Common style for \"i\" and \"?\" */\n",
911
+ "\n",
912
+ ".sk-estimator-doc-link,\n",
913
+ "a:link.sk-estimator-doc-link,\n",
914
+ "a:visited.sk-estimator-doc-link {\n",
915
+ " float: right;\n",
916
+ " font-size: smaller;\n",
917
+ " line-height: 1em;\n",
918
+ " font-family: monospace;\n",
919
+ " background-color: var(--sklearn-color-background);\n",
920
+ " border-radius: 1em;\n",
921
+ " height: 1em;\n",
922
+ " width: 1em;\n",
923
+ " text-decoration: none !important;\n",
924
+ " margin-left: 0.5em;\n",
925
+ " text-align: center;\n",
926
+ " /* unfitted */\n",
927
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
928
+ " color: var(--sklearn-color-unfitted-level-1);\n",
929
+ "}\n",
930
+ "\n",
931
+ ".sk-estimator-doc-link.fitted,\n",
932
+ "a:link.sk-estimator-doc-link.fitted,\n",
933
+ "a:visited.sk-estimator-doc-link.fitted {\n",
934
+ " /* fitted */\n",
935
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
936
+ " color: var(--sklearn-color-fitted-level-1);\n",
937
+ "}\n",
938
+ "\n",
939
+ "/* On hover */\n",
940
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
941
+ ".sk-estimator-doc-link:hover,\n",
942
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
943
+ ".sk-estimator-doc-link:hover {\n",
944
+ " /* unfitted */\n",
945
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
946
+ " color: var(--sklearn-color-background);\n",
947
+ " text-decoration: none;\n",
948
+ "}\n",
949
+ "\n",
950
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
951
+ ".sk-estimator-doc-link.fitted:hover,\n",
952
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
953
+ ".sk-estimator-doc-link.fitted:hover {\n",
954
+ " /* fitted */\n",
955
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
956
+ " color: var(--sklearn-color-background);\n",
957
+ " text-decoration: none;\n",
958
+ "}\n",
959
+ "\n",
960
+ "/* Span, style for the box shown on hovering the info icon */\n",
961
+ ".sk-estimator-doc-link span {\n",
962
+ " display: none;\n",
963
+ " z-index: 9999;\n",
964
+ " position: relative;\n",
965
+ " font-weight: normal;\n",
966
+ " right: .2ex;\n",
967
+ " padding: .5ex;\n",
968
+ " margin: .5ex;\n",
969
+ " width: min-content;\n",
970
+ " min-width: 20ex;\n",
971
+ " max-width: 50ex;\n",
972
+ " color: var(--sklearn-color-text);\n",
973
+ " box-shadow: 2pt 2pt 4pt #999;\n",
974
+ " /* unfitted */\n",
975
+ " background: var(--sklearn-color-unfitted-level-0);\n",
976
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
977
+ "}\n",
978
+ "\n",
979
+ ".sk-estimator-doc-link.fitted span {\n",
980
+ " /* fitted */\n",
981
+ " background: var(--sklearn-color-fitted-level-0);\n",
982
+ " border: var(--sklearn-color-fitted-level-3);\n",
983
+ "}\n",
984
+ "\n",
985
+ ".sk-estimator-doc-link:hover span {\n",
986
+ " display: block;\n",
987
+ "}\n",
988
+ "\n",
989
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
990
+ "\n",
991
+ "#sk-container-id-2 a.estimator_doc_link {\n",
992
+ " float: right;\n",
993
+ " font-size: 1rem;\n",
994
+ " line-height: 1em;\n",
995
+ " font-family: monospace;\n",
996
+ " background-color: var(--sklearn-color-background);\n",
997
+ " border-radius: 1rem;\n",
998
+ " height: 1rem;\n",
999
+ " width: 1rem;\n",
1000
+ " text-decoration: none;\n",
1001
+ " /* unfitted */\n",
1002
+ " color: var(--sklearn-color-unfitted-level-1);\n",
1003
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
1004
+ "}\n",
1005
+ "\n",
1006
+ "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
1007
+ " /* fitted */\n",
1008
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
1009
+ " color: var(--sklearn-color-fitted-level-1);\n",
1010
+ "}\n",
1011
+ "\n",
1012
+ "/* On hover */\n",
1013
+ "#sk-container-id-2 a.estimator_doc_link:hover {\n",
1014
+ " /* unfitted */\n",
1015
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
1016
+ " color: var(--sklearn-color-background);\n",
1017
+ " text-decoration: none;\n",
1018
+ "}\n",
1019
+ "\n",
1020
+ "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
1021
+ " /* fitted */\n",
1022
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
1023
+ "}\n",
1024
+ "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>CountVectorizer(max_features=10000, stop_words=&#x27;english&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\"><div><div>CountVectorizer</div></div><div><a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html\">?<span>Documentation for CountVectorizer</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></div></label><div class=\"sk-toggleable__content \"><pre>CountVectorizer(max_features=10000, stop_words=&#x27;english&#x27;)</pre></div> </div></div></div></div>"
1025
+ ],
1026
+ "text/plain": [
1027
+ "CountVectorizer(max_features=10000, stop_words='english')"
1028
+ ]
1029
+ },
1030
+ "execution_count": 42,
1031
+ "metadata": {},
1032
+ "output_type": "execute_result"
1033
+ }
1034
+ ],
1035
+ "source": [
1036
+ "cv"
1037
+ ]
1038
+ },
1039
+ {
1040
+ "cell_type": "code",
1041
+ "execution_count": 43,
1042
+ "metadata": {},
1043
+ "outputs": [],
1044
+ "source": [
1045
+ "vector = cv.fit_transform(new_movies['tags'].values.astype('U')).toarray()"
1046
+ ]
1047
+ },
1048
+ {
1049
+ "cell_type": "code",
1050
+ "execution_count": 44,
1051
+ "metadata": {},
1052
+ "outputs": [
1053
+ {
1054
+ "data": {
1055
+ "text/plain": [
1056
+ "(10000, 10000)"
1057
+ ]
1058
+ },
1059
+ "execution_count": 44,
1060
+ "metadata": {},
1061
+ "output_type": "execute_result"
1062
+ }
1063
+ ],
1064
+ "source": [
1065
+ "vector.shape"
1066
+ ]
1067
+ },
1068
+ {
1069
+ "cell_type": "code",
1070
+ "execution_count": 45,
1071
+ "metadata": {},
1072
+ "outputs": [],
1073
+ "source": [
1074
+ "from sklearn.metrics.pairwise import cosine_similarity"
1075
+ ]
1076
+ },
1077
+ {
1078
+ "cell_type": "code",
1079
+ "execution_count": 46,
1080
+ "metadata": {},
1081
+ "outputs": [],
1082
+ "source": [
1083
+ "similarity = cosine_similarity(vector)"
1084
+ ]
1085
+ },
1086
+ {
1087
+ "cell_type": "code",
1088
+ "execution_count": 47,
1089
+ "metadata": {},
1090
+ "outputs": [
1091
+ {
1092
+ "data": {
1093
+ "text/plain": [
1094
+ "array([[1. , 0.05634362, 0.13041013, ..., 0.07559289, 0.11065667,\n",
1095
+ " 0.06900656],\n",
1096
+ " [0.05634362, 1. , 0.07715167, ..., 0. , 0.03636965,\n",
1097
+ " 0. ],\n",
1098
+ " [0.13041013, 0.07715167, 1. , ..., 0.02300219, 0.0673435 ,\n",
1099
+ " 0.09449112],\n",
1100
+ " ...,\n",
1101
+ " [0.07559289, 0. , 0.02300219, ..., 1. , 0.03253 ,\n",
1102
+ " 0.03042903],\n",
1103
+ " [0.11065667, 0.03636965, 0.0673435 , ..., 0.03253 , 1. ,\n",
1104
+ " 0.04454354],\n",
1105
+ " [0.06900656, 0. , 0.09449112, ..., 0.03042903, 0.04454354,\n",
1106
+ " 1. ]], shape=(10000, 10000))"
1107
+ ]
1108
+ },
1109
+ "execution_count": 47,
1110
+ "metadata": {},
1111
+ "output_type": "execute_result"
1112
+ }
1113
+ ],
1114
+ "source": [
1115
+ "similarity"
1116
+ ]
1117
+ },
1118
+ {
1119
+ "cell_type": "code",
1120
+ "execution_count": 48,
1121
+ "metadata": {},
1122
+ "outputs": [
1123
+ {
1124
+ "data": {
1125
+ "text/plain": [
1126
+ "np.int64(2)"
1127
+ ]
1128
+ },
1129
+ "execution_count": 48,
1130
+ "metadata": {},
1131
+ "output_type": "execute_result"
1132
+ }
1133
+ ],
1134
+ "source": [
1135
+ "new_movies[new_movies['title']==\"The Godfather\"].index[0]"
1136
+ ]
1137
+ },
1138
+ {
1139
+ "cell_type": "code",
1140
+ "execution_count": 49,
1141
+ "metadata": {},
1142
+ "outputs": [
1143
+ {
1144
+ "name": "stdout",
1145
+ "output_type": "stream",
1146
+ "text": [
1147
+ "The Godfather\n",
1148
+ "The Godfather: Part II\n",
1149
+ "Blood Ties\n",
1150
+ "Joker\n",
1151
+ "Bomb City\n"
1152
+ ]
1153
+ }
1154
+ ],
1155
+ "source": [
1156
+ "distance = sorted(list(enumerate(similarity[2])) , reverse=True, key=lambda vector:vector[1])\n",
1157
+ "for i in distance[0:5]:\n",
1158
+ " print(new_movies.iloc[i[0]].title)"
1159
+ ]
1160
+ },
1161
+ {
1162
+ "cell_type": "code",
1163
+ "execution_count": 50,
1164
+ "metadata": {},
1165
+ "outputs": [],
1166
+ "source": [
1167
+ "def recommend(movies):\n",
1168
+ " index=new_movies[new_movies['title']==movies].index[0]\n",
1169
+ " distance = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda vector:vector[1])\n",
1170
+ " for i in distance[0:5]:\n",
1171
+ " print(new_movies.iloc[i[0]].title)"
1172
+ ]
1173
+ },
1174
+ {
1175
+ "cell_type": "code",
1176
+ "execution_count": 51,
1177
+ "metadata": {},
1178
+ "outputs": [
1179
+ {
1180
+ "name": "stdout",
1181
+ "output_type": "stream",
1182
+ "text": [
1183
+ "Iron Man\n",
1184
+ "Iron Man 3\n",
1185
+ "Guardians of the Galaxy Vol. 2\n",
1186
+ "Avengers: Age of Ultron\n",
1187
+ "Star Wars: Episode III - Revenge of the Sith\n"
1188
+ ]
1189
+ }
1190
+ ],
1191
+ "source": [
1192
+ "recommend(\"Iron Man\")"
1193
+ ]
1194
+ },
1195
+ {
1196
+ "cell_type": "code",
1197
+ "execution_count": 52,
1198
+ "metadata": {},
1199
+ "outputs": [],
1200
+ "source": [
1201
+ "import pickle"
1202
+ ]
1203
+ },
1204
+ {
1205
+ "cell_type": "code",
1206
+ "execution_count": 53,
1207
+ "metadata": {},
1208
+ "outputs": [],
1209
+ "source": [
1210
+ "pickle.dump(new_movies , open('movies_list.pkl' , 'wb'))"
1211
+ ]
1212
+ },
1213
+ {
1214
+ "cell_type": "code",
1215
+ "execution_count": 54,
1216
+ "metadata": {},
1217
+ "outputs": [],
1218
+ "source": [
1219
+ "pickle.dump(similarity,open('similarity.pkl','wb'))"
1220
+ ]
1221
+ },
1222
+ {
1223
+ "cell_type": "code",
1224
+ "execution_count": 55,
1225
+ "metadata": {},
1226
+ "outputs": [
1227
+ {
1228
+ "data": {
1229
+ "text/html": [
1230
+ "<div>\n",
1231
+ "<style scoped>\n",
1232
+ " .dataframe tbody tr th:only-of-type {\n",
1233
+ " vertical-align: middle;\n",
1234
+ " }\n",
1235
+ "\n",
1236
+ " .dataframe tbody tr th {\n",
1237
+ " vertical-align: top;\n",
1238
+ " }\n",
1239
+ "\n",
1240
+ " .dataframe thead th {\n",
1241
+ " text-align: right;\n",
1242
+ " }\n",
1243
+ "</style>\n",
1244
+ "<table border=\"1\" class=\"dataframe\">\n",
1245
+ " <thead>\n",
1246
+ " <tr style=\"text-align: right;\">\n",
1247
+ " <th></th>\n",
1248
+ " <th>id</th>\n",
1249
+ " <th>title</th>\n",
1250
+ " <th>tags</th>\n",
1251
+ " </tr>\n",
1252
+ " </thead>\n",
1253
+ " <tbody>\n",
1254
+ " <tr>\n",
1255
+ " <th>0</th>\n",
1256
+ " <td>278</td>\n",
1257
+ " <td>The Shawshank Redemption</td>\n",
1258
+ " <td>Framed in the 1940s for the double murder of h...</td>\n",
1259
+ " </tr>\n",
1260
+ " <tr>\n",
1261
+ " <th>1</th>\n",
1262
+ " <td>19404</td>\n",
1263
+ " <td>Dilwale Dulhania Le Jayenge</td>\n",
1264
+ " <td>Raj is a rich, carefree, happy-go-lucky second...</td>\n",
1265
+ " </tr>\n",
1266
+ " <tr>\n",
1267
+ " <th>2</th>\n",
1268
+ " <td>238</td>\n",
1269
+ " <td>The Godfather</td>\n",
1270
+ " <td>Spanning the years 1945 to 1955, a chronicle o...</td>\n",
1271
+ " </tr>\n",
1272
+ " <tr>\n",
1273
+ " <th>3</th>\n",
1274
+ " <td>424</td>\n",
1275
+ " <td>Schindler's List</td>\n",
1276
+ " <td>The true story of how businessman Oskar Schind...</td>\n",
1277
+ " </tr>\n",
1278
+ " <tr>\n",
1279
+ " <th>4</th>\n",
1280
+ " <td>240</td>\n",
1281
+ " <td>The Godfather: Part II</td>\n",
1282
+ " <td>In the continuing saga of the Corleone crime f...</td>\n",
1283
+ " </tr>\n",
1284
+ " <tr>\n",
1285
+ " <th>...</th>\n",
1286
+ " <td>...</td>\n",
1287
+ " <td>...</td>\n",
1288
+ " <td>...</td>\n",
1289
+ " </tr>\n",
1290
+ " <tr>\n",
1291
+ " <th>9995</th>\n",
1292
+ " <td>10196</td>\n",
1293
+ " <td>The Last Airbender</td>\n",
1294
+ " <td>The story follows the adventures of Aang, a yo...</td>\n",
1295
+ " </tr>\n",
1296
+ " <tr>\n",
1297
+ " <th>9996</th>\n",
1298
+ " <td>331446</td>\n",
1299
+ " <td>Sharknado 3: Oh Hell No!</td>\n",
1300
+ " <td>The sharks take bite out of the East Coast whe...</td>\n",
1301
+ " </tr>\n",
1302
+ " <tr>\n",
1303
+ " <th>9997</th>\n",
1304
+ " <td>13995</td>\n",
1305
+ " <td>Captain America</td>\n",
1306
+ " <td>During World War II, a brave, patriotic Americ...</td>\n",
1307
+ " </tr>\n",
1308
+ " <tr>\n",
1309
+ " <th>9998</th>\n",
1310
+ " <td>2312</td>\n",
1311
+ " <td>In the Name of the King: A Dungeon Siege Tale</td>\n",
1312
+ " <td>A man named Farmer sets out to rescue his kidn...</td>\n",
1313
+ " </tr>\n",
1314
+ " <tr>\n",
1315
+ " <th>9999</th>\n",
1316
+ " <td>455957</td>\n",
1317
+ " <td>Domino</td>\n",
1318
+ " <td>Seeking justice for his partner’s murder by an...</td>\n",
1319
+ " </tr>\n",
1320
+ " </tbody>\n",
1321
+ "</table>\n",
1322
+ "<p>10000 rows × 3 columns</p>\n",
1323
+ "</div>"
1324
+ ],
1325
+ "text/plain": [
1326
+ " id title \\\n",
1327
+ "0 278 The Shawshank Redemption \n",
1328
+ "1 19404 Dilwale Dulhania Le Jayenge \n",
1329
+ "2 238 The Godfather \n",
1330
+ "3 424 Schindler's List \n",
1331
+ "4 240 The Godfather: Part II \n",
1332
+ "... ... ... \n",
1333
+ "9995 10196 The Last Airbender \n",
1334
+ "9996 331446 Sharknado 3: Oh Hell No! \n",
1335
+ "9997 13995 Captain America \n",
1336
+ "9998 2312 In the Name of the King: A Dungeon Siege Tale \n",
1337
+ "9999 455957 Domino \n",
1338
+ "\n",
1339
+ " tags \n",
1340
+ "0 Framed in the 1940s for the double murder of h... \n",
1341
+ "1 Raj is a rich, carefree, happy-go-lucky second... \n",
1342
+ "2 Spanning the years 1945 to 1955, a chronicle o... \n",
1343
+ "3 The true story of how businessman Oskar Schind... \n",
1344
+ "4 In the continuing saga of the Corleone crime f... \n",
1345
+ "... ... \n",
1346
+ "9995 The story follows the adventures of Aang, a yo... \n",
1347
+ "9996 The sharks take bite out of the East Coast whe... \n",
1348
+ "9997 During World War II, a brave, patriotic Americ... \n",
1349
+ "9998 A man named Farmer sets out to rescue his kidn... \n",
1350
+ "9999 Seeking justice for his partner’s murder by an... \n",
1351
+ "\n",
1352
+ "[10000 rows x 3 columns]"
1353
+ ]
1354
+ },
1355
+ "execution_count": 55,
1356
+ "metadata": {},
1357
+ "output_type": "execute_result"
1358
+ }
1359
+ ],
1360
+ "source": [
1361
+ "pickle.load(open('movies_list.pkl','rb'))"
1362
+ ]
1363
+ },
1364
+ {
1365
+ "cell_type": "code",
1366
+ "execution_count": null,
1367
+ "metadata": {},
1368
+ "outputs": [],
1369
+ "source": []
1370
+ }
1371
+ ],
1372
+ "metadata": {
1373
+ "kernelspec": {
1374
+ "display_name": "aienv",
1375
+ "language": "python",
1376
+ "name": "python3"
1377
+ },
1378
+ "language_info": {
1379
+ "codemirror_mode": {
1380
+ "name": "ipython",
1381
+ "version": 3
1382
+ },
1383
+ "file_extension": ".py",
1384
+ "mimetype": "text/x-python",
1385
+ "name": "python",
1386
+ "nbconvert_exporter": "python",
1387
+ "pygments_lexer": "ipython3",
1388
+ "version": "3.10.16"
1389
+ }
1390
+ },
1391
+ "nbformat": 4,
1392
+ "nbformat_minor": 2
1393
+ }
Movie-recommendation-system/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ seaborn
4
+ matplotlib
5
+ scikit-learn
Movie-recommendation-system/ssh.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from requests.packages.urllib3.exceptions import InsecureRequestWarning
3
+ requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
4
+
5
+ def fetch_url():
6
+ url = "https://huggingface.co/spaces/Shiva7706/ML_Movie_recommendation_system"
7
+ try:
8
+ response = requests.get(url, verify=False)
9
+ print(f"Status Code: {response.status_code}")
10
+ print("Connection successful!")
11
+ except Exception as e:
12
+ print(f"Error: {e}")
13
+
14
+ if __name__ == "__main__":
15
+ fetch_url()
Movie-recommendation-system/top10K-TMDB-movies.csv ADDED
The diff for this file is too large to render. See raw diff