Jan Mühlnikel commited on
Commit
a88ee3f
·
1 Parent(s): 2a6aea4

experiment

Browse files
Files changed (1) hide show
  1. functions/calc_matches.py +16 -9
functions/calc_matches.py CHANGED
@@ -22,25 +22,32 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
22
  project_df_index_map = {i: index for i, index in enumerate(project_df_indices)}
23
 
24
  # Select submatrix based on indices from both dataframes
25
- #match_matrix = similarity_matrix[filtered_df_indices, :][:, project_df_indices]
26
- match_matrix = similarity_matrix[np.ix_(filtered_df_indices, project_df_indices)]
27
 
28
  st.write(match_matrix.shape)
29
 
30
  # Get the linear indices of the top 'top_x' values
31
  # (flattened index to handle the sparse matrix more effectively)
32
- linear_indices = np.argsort(match_matrix.data)[-top_x:]
33
- if len(linear_indices) < top_x:
34
- top_x = len(linear_indices)
35
 
36
  # Convert flat indices to 2D indices using the shape of the submatrix
37
- top_indices = np.unravel_index(linear_indices, match_matrix.shape)
38
 
39
  # Get the corresponding similarity values
40
- top_values = match_matrix.data[linear_indices]
41
 
42
- top_filtered_df_indices = [filtered_df_index_map[i] for i in top_indices[0]]
43
- top_project_df_indices = [project_df_index_map[i] for i in top_indices[1]]
 
 
 
 
 
 
 
 
44
 
45
  st.write(top_filtered_df_indices)
46
 
 
22
  project_df_index_map = {i: index for i, index in enumerate(project_df_indices)}
23
 
24
  # Select submatrix based on indices from both dataframes
25
+ match_matrix = similarity_matrix[filtered_df_indices, :][:, project_df_indices]
 
26
 
27
  st.write(match_matrix.shape)
28
 
29
  # Get the linear indices of the top 'top_x' values
30
  # (flattened index to handle the sparse matrix more effectively)
31
+ #linear_indices = np.argsort(match_matrix.data)[-top_x:]
32
+ #if len(linear_indices) < top_x:
33
+ # top_x = len(linear_indices)
34
 
35
  # Convert flat indices to 2D indices using the shape of the submatrix
36
+ #top_indices = np.unravel_index(linear_indices, match_matrix.shape)
37
 
38
  # Get the corresponding similarity values
39
+ #top_values = match_matrix.data[linear_indices]
40
 
41
+ flat_indices = np.argpartition(match_matrix.flatten(), -3)[-3:]
42
+
43
+ # Convert flat indices to 2D row and column indices
44
+ row_indices, col_indices = np.unravel_index(flat_indices, match_matrix.shape)
45
+
46
+ # Get the values corresponding to the top k indices
47
+ top_values = match_matrix[row_indices, col_indices]
48
+
49
+ top_filtered_df_indices = [filtered_df_index_map[i] for i in col_indices]
50
+ top_project_df_indices = [project_df_index_map[i] for i in row_indices]
51
 
52
  st.write(top_filtered_df_indices)
53