Spaces:

GIZ
/

Development-Project-Synergy-Finder

Sleeping

Jan Mühlnikel commited on May 26, 2024

Commit

3d9250a

1 Parent(s): b188b37

experiment

Files changed (1) hide show

functions/calc_matches.py CHANGED Viewed

@@ -10,8 +10,8 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
     st.write(similarity_matrix.shape)
     # Ensure the matrix is in a suitable format for manipulation
-    #if not isinstance(similarity_matrix, csr_matrix):
-    #    similarity_matrix = csr_matrix(similarity_matrix)
     # Get indices from dataframes
     filtered_df_indices = filtered_df.index.to_list()
@@ -38,10 +38,22 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
     # Get the corresponding similarity values
     #top_values = match_matrix.data[linear_indices]
-    flat_indices = np.argpartition(match_matrix.flatten(), -3)[-3:]
     # Convert flat indices to 2D row and column indices
-    row_indices, col_indices = np.unravel_index(flat_indices, match_matrix.shape)
     # Get the values corresponding to the top k indices
     top_values = match_matrix[row_indices, col_indices]

     st.write(similarity_matrix.shape)
     # Ensure the matrix is in a suitable format for manipulation
+    if not isinstance(similarity_matrix, csr_matrix):
+        similarity_matrix = csr_matrix(similarity_matrix)
     # Get indices from dataframes
     filtered_df_indices = filtered_df.index.to_list()
     # Get the corresponding similarity values
     #top_values = match_matrix.data[linear_indices]
+    flat_data = match_matrix.data
+    # Get the indices that would sort the data array in descending order
+    sorted_indices = np.argsort(flat_data)[::-1]
+    # Take the first k indices to get the top k maximum values
+    top_indices = sorted_indices[:top_x]
     # Convert flat indices to 2D row and column indices
+    row_indices, col_indices = match_matrix.nonzero()
+    row_indices = row_indices[top_indices]
+    col_indices = col_indices[top_indices]
+    # Get the values corresponding to the top k indices
+    top_values = flat_data[top_indices]
     # Get the values corresponding to the top k indices
     top_values = match_matrix[row_indices, col_indices]