Spaces:

GIZ
/

Development-Project-Synergy-Finder

Sleeping

Jan Mühlnikel commited on May 26, 2024

Commit

a88ee3f

1 Parent(s): 2a6aea4

experiment

Files changed (1) hide show

functions/calc_matches.py CHANGED Viewed

@@ -22,25 +22,32 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
     project_df_index_map = {i: index for i, index in enumerate(project_df_indices)}
     # Select submatrix based on indices from both dataframes
-    #match_matrix = similarity_matrix[filtered_df_indices, :][:, project_df_indices]
-    match_matrix = similarity_matrix[np.ix_(filtered_df_indices, project_df_indices)]
     st.write(match_matrix.shape)
     # Get the linear indices of the top 'top_x' values
     # (flattened index to handle the sparse matrix more effectively)
-    linear_indices = np.argsort(match_matrix.data)[-top_x:]
-    if len(linear_indices) < top_x:
-        top_x = len(linear_indices)
     # Convert flat indices to 2D indices using the shape of the submatrix
-    top_indices = np.unravel_index(linear_indices, match_matrix.shape)
     # Get the corresponding similarity values
-    top_values = match_matrix.data[linear_indices]
-    top_filtered_df_indices = [filtered_df_index_map[i] for i in top_indices[0]]
-    top_project_df_indices = [project_df_index_map[i] for i in top_indices[1]]
     st.write(top_filtered_df_indices)

     project_df_index_map = {i: index for i, index in enumerate(project_df_indices)}
     # Select submatrix based on indices from both dataframes
+    match_matrix = similarity_matrix[filtered_df_indices, :][:, project_df_indices]
     st.write(match_matrix.shape)
     # Get the linear indices of the top 'top_x' values
     # (flattened index to handle the sparse matrix more effectively)
+    #linear_indices = np.argsort(match_matrix.data)[-top_x:]
+    #if len(linear_indices) < top_x:
+    #    top_x = len(linear_indices)
     # Convert flat indices to 2D indices using the shape of the submatrix
+    #top_indices = np.unravel_index(linear_indices, match_matrix.shape)
     # Get the corresponding similarity values
+    #top_values = match_matrix.data[linear_indices]
+    flat_indices = np.argpartition(match_matrix.flatten(), -3)[-3:]
+    # Convert flat indices to 2D row and column indices
+    row_indices, col_indices = np.unravel_index(flat_indices, match_matrix.shape)
+    # Get the values corresponding to the top k indices
+    top_values = match_matrix[row_indices, col_indices]
+    top_filtered_df_indices = [filtered_df_index_map[i] for i in col_indices]
+    top_project_df_indices = [project_df_index_map[i] for i in row_indices]
     st.write(top_filtered_df_indices)