File size: 4,288 Bytes
4c8efc5 7cf9593 dd1e684 4c8efc5 03164f7 4c8efc5 03164f7 4c8efc5 5a1f34c 4c8efc5 03164f7 4f0ec5a 5a1f34c 03164f7 4f0ec5a 50861f7 03164f7 2e4da47 fe189c2 2e4da47 fe189c2 2e4da47 dd1e684 4c8efc5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import pandas as pd
from tqdm import tqdm
import numpy as np
import itertools
import requests
import sys
from pyvis.network import Network
import torch
import torch.nn.functional as F
from torch.nn import Linear
from arango import ArangoClient
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv, to_hetero
from torch_geometric.transforms import RandomLinkSplit, ToUndirected
from sentence_transformers import SentenceTransformer
from torch_geometric.data import HeteroData
import yaml
import pickle
#----------------------------------------------
# SAGE model
class GNNEncoder(torch.nn.Module):
def __init__(self, hidden_channels, out_channels):
super().__init__()
# these convolutions have been replicated to match the number of edge types
self.conv1 = SAGEConv((-1, -1), hidden_channels)
self.conv2 = SAGEConv((-1, -1), out_channels)
def forward(self, x, edge_index):
x = self.conv1(x, edge_index).relu()
x = self.conv2(x, edge_index)
return x
class EdgeDecoder(torch.nn.Module):
def __init__(self, hidden_channels):
super().__init__()
self.lin1 = Linear(2 * hidden_channels, hidden_channels)
self.lin2 = Linear(hidden_channels, 1)
def forward(self, z_dict, edge_label_index):
row, col = edge_label_index
# concat user and movie embeddings
z = torch.cat([z_dict['user'][row], z_dict['movie'][col]], dim=-1)
# concatenated embeddings passed to linear layer
z = self.lin1(z).relu()
z = self.lin2(z)
return z.view(-1)
class Model(torch.nn.Module):
def __init__(self, hidden_channels):
super().__init__()
self.encoder = GNNEncoder(hidden_channels, hidden_channels)
self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
self.decoder = EdgeDecoder(hidden_channels)
def forward(self, x_dict, edge_index_dict, edge_label_index):
# z_dict contains dictionary of movie and user embeddings returned from GraphSage
z_dict = self.encoder(x_dict, edge_index_dict)
return self.decoder(z_dict, edge_label_index)
#----------------------------------------------
def load_hetero_data():
with open('Hgraph.pkl', 'rb') as file:
global data
data = pickle.load(file)
return data
def load_model(data):
model = Model(hidden_channels=32)
with torch.no_grad():
model.encoder(data.x_dict, data.edge_index_dict)
model.load_state_dict(torch.load('model.pt',map_location=torch.device('cpu')))
model.eval()
return model
global id_map
with open('id_map.pkl', 'rb') as file:
id_map = pickle.load(file)
global m_id
with open('m_id.pkl', 'rb') as file:
m_id = pickle.load(file)
def get_movie(idx):
return id_map.loc[id_map['movieId'] == m_id[idx]].index
def get_recommendation(model,data,user_id):
total_movies = 9025
user_row = torch.tensor([user_id] * total_movies)
all_movie_ids = torch.arange(total_movies)
edge_label_index = torch.stack([user_row, all_movie_ids], dim=0)
pred = model(data.x_dict, data.edge_index_dict,edge_label_index)
pred = pred.clamp(min=0, max=5)
# we will only select movies for the user where the predicting rating is =5
rec_movie_ids = (pred == 5).nonzero(as_tuple=True)
top_ten_recs = [rec_movies for rec_movies in rec_movie_ids[0].tolist()[:10]]
top_ten_recs = [get_movie(movie_idx) for movie_idx in top_ten_recs]
return {'user': user_id, 'rec_movies': top_ten_recs}
def make_1_hop_graph(data,user_id):
a = data["user", "rates", "movie"].edge_index
b = data["user", "rates", "movie"].edge_label
idxs = (a[0] == user_id).nonzero(as_tuple=True)[0]
ratings = b[idxs]#.tolist()
movie_idxs = a[1][idxs]#.tolist()
n = len(ratings)
net = Network(cdn_resources="local",filter_menu=True)
for i in range(n):
#print(i)
Source = user_id
lab = get_title(movie_idxs[i])[0]
Target = movie_idxs[i] + 671 # Addition for sperating movie with user_id
weight = ratings[i].item()
net.add_node(Source, label=str(Source),color='#FF0000')
net.add_node(Target.item(), label=lab)
net.add_edge(Source, Target.item(), title=weight)
net.show('index.html')
|