Spaces:

Polo123
/

PyG-ArangoDB-Movie-Recommendation

Sleeping

App Files Files Community

Polo123 commited on Mar 15, 2024

Commit

c3a89b9

verified ·

1 Parent(s): aeb0bfe

Update logic.py

Browse files

Files changed (1) hide show

logic.py +89 -2

logic.py CHANGED Viewed

@@ -151,9 +151,74 @@ def GenresEncoder(movie_docs):
             x[i, mapping[genre]] = 1
     return x.to(device)
-#-------------------------------------------------------------------------------------------
 def make_graph():
     metadata_path = './sampled_movie_dataset/movies_metadata.csv'
     df = pd.read_csv(metadata_path)
@@ -349,7 +414,29 @@ def make_pyg_graph(movie_rec_db):
     )(data)
     return train_data, val_data, test_data

             x[i, mapping[genre]] = 1
     return x.to(device)
+def weighted_mse_loss(pred, target, weight=None):
+    weight = 1. if weight is None else weight[target].to(pred.dtype)
+    return (weight * (pred - target.to(pred.dtype)).pow(2)).mean()
+@torch.no_grad()
+def test(data):
+    model.eval()
+    pred = model(data.x_dict, data.edge_index_dict,
+                 data['user', 'movie'].edge_label_index)
+    pred = pred.clamp(min=0, max=5)
+    target = data['user', 'movie'].edge_label.float()
+    rmse = F.mse_loss(pred, target).sqrt()
+    return float(rmse)
+def train():
+    model.train()
+    optimizer.zero_grad()
+    pred = model(train_data.x_dict, train_data.edge_index_dict,
+                 train_data['user', 'movie'].edge_label_index)
+    target = train_data['user', 'movie'].edge_label
+    loss = weighted_mse_loss(pred, target, weight)
+    loss.backward()
+    optimizer.step()
+    return float(loss)
+#-------------------------------------------------------------------------------------------
+# SAGE model
+class GNNEncoder(torch.nn.Module):
+    def __init__(self, hidden_channels, out_channels):
+        super().__init__()
+        # these convolutions have been replicated to match the number of edge types
+        self.conv1 = SAGEConv((-1, -1), hidden_channels)
+        self.conv2 = SAGEConv((-1, -1), out_channels)
+    def forward(self, x, edge_index):
+        x = self.conv1(x, edge_index).relu()
+        x = self.conv2(x, edge_index)
+        return x
+class EdgeDecoder(torch.nn.Module):
+    def __init__(self, hidden_channels):
+        super().__init__()
+        self.lin1 = Linear(2 * hidden_channels, hidden_channels)
+        self.lin2 = Linear(hidden_channels, 1)
+    def forward(self, z_dict, edge_label_index):
+        row, col = edge_label_index
+        # concat user and movie embeddings
+        z = torch.cat([z_dict['user'][row], z_dict['movie'][col]], dim=-1)
+        # concatenated embeddings passed to linear layer
+        z = self.lin1(z).relu()
+        z = self.lin2(z)
+        return z.view(-1)
+class Model(torch.nn.Module):
+    def __init__(self, hidden_channels):
+        super().__init__()
+        self.encoder = GNNEncoder(hidden_channels, hidden_channels)
+        self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
+        self.decoder = EdgeDecoder(hidden_channels)
+    def forward(self, x_dict, edge_index_dict, edge_label_index):
+        # z_dict contains dictionary of movie and user embeddings returned from GraphSage
+        z_dict = self.encoder(x_dict, edge_index_dict)
+        return self.decoder(z_dict, edge_label_index)
+#-------------------------------------------------------------------------------------------
 def make_graph():
     metadata_path = './sampled_movie_dataset/movies_metadata.csv'
     df = pd.read_csv(metadata_path)
     )(data)
     return train_data, val_data, test_data
+def train(train_data, val_data, test_data):
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    #make weight
+    weight = torch.bincount(train_data['user', 'movie'].edge_label)
+    weight = weight.max() / weight
+    model = Model(hidden_channels=32).to(device)
+    with torch.no_grad():
+        model.encoder(train_data.x_dict, train_data.edge_index_dict)
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
+    # Train loop
+    for epoch in range(1, 300):
+        loss = train()
+        train_rmse = test(train_data)
+        val_rmse = test(val_data)
+        test_rmse = test(test_data)
+        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_rmse:.4f}, '
+              f'Val: {val_rmse:.4f}, Test: {test_rmse:.4f}')