Polo123 commited on
Commit
64e5385
·
verified ·
1 Parent(s): 97de7f8

Update logic.py

Browse files
Files changed (1) hide show
  1. logic.py +167 -1
logic.py CHANGED
@@ -50,6 +50,63 @@ def remove_movies(m_id):
50
  #print('No Meta data information at:', m_id[idx])
51
  return no_metadata
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  #-------------------------------------------------------------------------------------------
55
 
@@ -98,11 +155,120 @@ def make_graph():
98
  for idx, m in enumerate(m_id):
99
  movie_mappings[m] = idx
100
 
 
 
101
  return movie_mappings
102
 
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- def load_data_to_ArangoDB():
106
 
107
 
108
 
 
50
  #print('No Meta data information at:', m_id[idx])
51
  return no_metadata
52
 
53
+ def populate_user_collection(total_users):
54
+ batch = []
55
+ BATCH_SIZE = 50
56
+ batch_idx = 1
57
+ index = 0
58
+ user_ids = list(user_mapping.keys())
59
+ user_collection = movie_rec_db["Users"]
60
+ for idx in tqdm(range(total_users)):
61
+ insert_doc = {}
62
+
63
+ insert_doc["_id"] = "Users/" + str(user_mapping[user_ids[idx]])
64
+ insert_doc["original_id"] = str(user_ids[idx])
65
+
66
+ batch.append(insert_doc)
67
+ index +=1
68
+ last_record = (idx == (total_users - 1))
69
+ if index % BATCH_SIZE == 0:
70
+ #print("Inserting batch %d" % (batch_idx))
71
+ batch_idx += 1
72
+ user_collection.import_bulk(batch)
73
+ batch = []
74
+ if last_record and len(batch) > 0:
75
+ print("Inserting batch the last batch!")
76
+ user_collection.import_bulk(batch)
77
+
78
+ def create_ratings_graph(user_id, movie_id, ratings):
79
+ batch = []
80
+ BATCH_SIZE = 100
81
+ batch_idx = 1
82
+ index = 0
83
+ edge_collection = movie_rec_db["Ratings"]
84
+ for idx in tqdm(range(ratings.shape[0])):
85
+
86
+ # removing edges (movies) with no metatdata
87
+ if movie_id[idx] in no_metadata:
88
+ print('Removing edges with no metadata', movie_id[idx])
89
+
90
+ else:
91
+ insert_doc = {}
92
+ insert_doc = {"_id": "Ratings" + "/" + 'user-' + str(user_mapping[user_id[idx]]) + "-r-" + "movie-" + str(movie_mappings[movie_id[idx]]),
93
+ "_from": ("Users" + "/" + str(user_mapping[user_id[idx]])),
94
+ "_to": ("Movie" + "/" + str(movie_mappings[movie_id[idx]])),
95
+ "_rating": float(ratings[idx])}
96
+
97
+ batch.append(insert_doc)
98
+ index += 1
99
+ last_record = (idx == (ratings.shape[0] - 1))
100
+
101
+ if index % BATCH_SIZE == 0:
102
+ #print("Inserting batch %d" % (batch_idx))
103
+ batch_idx += 1
104
+ edge_collection.import_bulk(batch)
105
+ batch = []
106
+ if last_record and len(batch) > 0:
107
+ print("Inserting batch the last batch!")
108
+ edge_collection.import_bulk(batch)
109
+
110
 
111
  #-------------------------------------------------------------------------------------------
112
 
 
155
  for idx, m in enumerate(m_id):
156
  movie_mappings[m] = idx
157
 
158
+
159
+
160
  return movie_mappings
161
 
162
 
163
+ def login_ArangoDB():
164
+ # get temporary credentials for ArangoDB on cloud
165
+ login = oasis.getTempCredentials(tutorialName="MovieRecommendations", credentialProvider="https://tutorials.arangodb.cloud:8529/_db/_system/tutorialDB/tutorialDB")
166
+ # url to access the ArangoDB Web UI
167
+ print("https://"+login["hostname"]+":"+str(login["port"]))
168
+ print("Username: " + login["username"])
169
+ print("Password: " + login["password"])
170
+ print("Database: " + login["dbName"])
171
+ return login
172
+
173
+ def load_data_to_ArangoDB(login):
174
+
175
+ movie_rec_db = oasis.connect_python_arango(login)
176
+ if not movie_rec_db.has_collection("Movie"):
177
+ movie_rec_db.create_collection("Movie", replication_factor=3)
178
+
179
+ batch = []
180
+ BATCH_SIZE = 128
181
+ batch_idx = 1
182
+ index = 0
183
+ movie_collection = movie_rec_db["Movie"]
184
+
185
+ # loading movies metadata information into ArangoDB's Movie collection
186
+ for idx in tqdm(range(len(m_id))):
187
+ insert_doc = {}
188
+ tmdb_id = id_map.loc[id_map['movieId'] == m_id[idx]]
189
+
190
+ if tmdb_id.size == 0:
191
+ print('No Meta data information at:', m_id[idx])
192
+
193
+
194
+ else:
195
+ tmdb_id = int(tmdb_id.iloc[:,1][0])
196
+ emb_id = "Movie/" + str(movie_mappings[m_id[idx]])
197
+ insert_doc["_id"] = emb_id
198
+ m_meta = sampled_md.loc[sampled_md['id'] == tmdb_id]
199
+ # adding movie metadata information
200
+ m_title = m_meta.iloc[0]['title']
201
+ m_poster = m_meta.iloc[0]['poster_path']
202
+ m_description = m_meta.iloc[0]['description']
203
+ m_language = m_meta.iloc[0]['original_language']
204
+ m_genre = m_meta.iloc[0]['genres']
205
+ m_genre = yaml.load(m_genre, Loader=yaml.BaseLoader)
206
+ genres = [g['name'] for g in m_genre]
207
+
208
+ insert_doc["movieId"] = m_id[idx]
209
+ insert_doc["mapped_movieId"] = movie_mappings[m_id[idx]]
210
+ insert_doc["tmdbId"] = tmdb_id
211
+ insert_doc['movie_title'] = m_title
212
+
213
+ insert_doc['description'] = m_description
214
+ insert_doc['genres'] = genres
215
+ insert_doc['language'] = m_language
216
+
217
+ if str(m_poster) == "nan":
218
+ insert_doc['poster_path'] = "No poster path available"
219
+ else:
220
+ insert_doc['poster_path'] = m_poster
221
+
222
+ batch.append(insert_doc)
223
+ index +=1
224
+ last_record = (idx == (len(m_id) - 1))
225
+ if index % BATCH_SIZE == 0:
226
+ #print("Inserting batch %d" % (batch_idx))
227
+ batch_idx += 1
228
+ movie_collection.import_bulk(batch)
229
+ batch = []
230
+ if last_record and len(batch) > 0:
231
+ print("Inserting batch the last batch!")
232
+ movie_collection.import_bulk(batch)
233
+
234
+ if not movie_rec_db.has_collection("Users"):
235
+ movie_rec_db.create_collection("Users", replication_factor=3)
236
+
237
+ total_users = np.unique(ratings_df[['userId']].values.flatten()).shape[0]
238
+ print("Total number of Users:", total_users)
239
+ populate_user_collection(total_users)
240
+
241
+ # This returns an API wrapper for "Ratings" collection.
242
+ if not movie_rec_db.has_collection("Ratings"):
243
+ movie_rec_db.create_collection("Ratings", edge=True, replication_factor=3)
244
+
245
+ if not movie_rec_db.has_graph("movie_rating_graph"):
246
+ movie_rec_db.create_graph('movie_rating_graph', smart=True)
247
+
248
+ # This returns and API wrapper for the above created graphs
249
+ movie_rating_graph = movie_rec_db.graph("movie_rating_graph")
250
+
251
+ # Create a new vertex collection named "Users" if it does not exist.
252
+ if not movie_rating_graph.has_vertex_collection("Users"):
253
+ movie_rating_graph.vertex_collection("Users")
254
+
255
+ # Create a new vertex collection named "Movie" if it does not exist.
256
+ if not movie_rating_graph.has_vertex_collection("Movie"):
257
+ movie_rating_graph.vertex_collection("Movie")
258
+
259
+ if not movie_rating_graph.has_edge_definition("Ratings"):
260
+ Ratings = movie_rating_graph.create_edge_definition(
261
+ edge_collection='Ratings',
262
+ from_vertex_collections=['Users'],
263
+ to_vertex_collections=['Movie']
264
+ )
265
+
266
+ user_id, movie_id, ratings = ratings_df[['userId']].values.flatten(), ratings_df[['movieId']].values.flatten() , ratings_df[['rating']].values.flatten()
267
+ create_ratings_graph(user_id, movie_id, ratings)
268
+
269
+
270
+ return movie_rec_db
271
 
 
272
 
273
 
274