Jan Mühlnikel commited on
Commit
8250706
·
1 Parent(s): 2eaf511

experiment

Browse files
Files changed (1) hide show
  1. functions/calc_matches.py +22 -5
functions/calc_matches.py CHANGED
@@ -5,10 +5,6 @@ import streamlit as st
5
 
6
  # multi_project_matching
7
  def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
8
- st.write(filtered_df.shape)
9
- st.write(project_df.shape)
10
- st.write(similarity_matrix.shape)
11
-
12
  # Ensure the matrix is in a suitable format for manipulation
13
  if not isinstance(similarity_matrix, csr_matrix):
14
  similarity_matrix = csr_matrix(similarity_matrix)
@@ -16,12 +12,33 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
16
  filtered_indices = filtered_df.index.to_list()
17
  project_indices = project_df.index.to_list()
18
 
19
- match_matrix = similarity_matrix[project_indices, :][:, filtered_indices]
20
 
21
  dense_match_matrix = match_matrix.toarray()
22
 
23
  st.write(dense_match_matrix.shape)
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  """
26
  p1_df = filtered_df.loc[top_col_indices].copy()
27
  p1_df['similarity'] = top_values
 
5
 
6
  # multi_project_matching
7
  def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
 
 
 
 
8
  # Ensure the matrix is in a suitable format for manipulation
9
  if not isinstance(similarity_matrix, csr_matrix):
10
  similarity_matrix = csr_matrix(similarity_matrix)
 
12
  filtered_indices = filtered_df.index.to_list()
13
  project_indices = project_df.index.to_list()
14
 
15
+ match_matrix = similarity_matrix[project_indices, :][:, filtered_indices] # row / column
16
 
17
  dense_match_matrix = match_matrix.toarray()
18
 
19
  st.write(dense_match_matrix.shape)
20
 
21
+ flat_matrix = dense_match_matrix.flatten()
22
+
23
+ # Get the indices of the top 15 values in the flattened matrix
24
+ top_15_indices = np.argsort(flat_matrix)[-top_x:][::-1]
25
+
26
+ # Convert flat indices back to 2D indices
27
+ top_15_2d_indices = np.unravel_index(top_15_indices, dense_match_matrix.shape)
28
+
29
+ # Extract the corresponding values
30
+ top_15_values = flat_matrix[top_15_indices]
31
+
32
+ # Prepare the result with row and column indices from original dataframes
33
+ top_15_matches = []
34
+ for value, row, col in zip(top_15_values, top_15_2d_indices[0], top_15_2d_indices[1]):
35
+ original_row_index = project_indices[row]
36
+ original_col_index = filtered_indices[col]
37
+ top_15_matches.append((value, original_row_index, original_col_index))
38
+
39
+ st.write(top_15_matches)
40
+
41
+
42
  """
43
  p1_df = filtered_df.loc[top_col_indices].copy()
44
  p1_df['similarity'] = top_values