Jan Mühlnikel
commited on
Commit
·
8250706
1
Parent(s):
2eaf511
experiment
Browse files- functions/calc_matches.py +22 -5
functions/calc_matches.py
CHANGED
@@ -5,10 +5,6 @@ import streamlit as st
|
|
5 |
|
6 |
# multi_project_matching
|
7 |
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
8 |
-
st.write(filtered_df.shape)
|
9 |
-
st.write(project_df.shape)
|
10 |
-
st.write(similarity_matrix.shape)
|
11 |
-
|
12 |
# Ensure the matrix is in a suitable format for manipulation
|
13 |
if not isinstance(similarity_matrix, csr_matrix):
|
14 |
similarity_matrix = csr_matrix(similarity_matrix)
|
@@ -16,12 +12,33 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
|
16 |
filtered_indices = filtered_df.index.to_list()
|
17 |
project_indices = project_df.index.to_list()
|
18 |
|
19 |
-
match_matrix = similarity_matrix[project_indices, :][:, filtered_indices]
|
20 |
|
21 |
dense_match_matrix = match_matrix.toarray()
|
22 |
|
23 |
st.write(dense_match_matrix.shape)
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
"""
|
26 |
p1_df = filtered_df.loc[top_col_indices].copy()
|
27 |
p1_df['similarity'] = top_values
|
|
|
5 |
|
6 |
# multi_project_matching
|
7 |
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
|
|
|
|
|
|
|
|
8 |
# Ensure the matrix is in a suitable format for manipulation
|
9 |
if not isinstance(similarity_matrix, csr_matrix):
|
10 |
similarity_matrix = csr_matrix(similarity_matrix)
|
|
|
12 |
filtered_indices = filtered_df.index.to_list()
|
13 |
project_indices = project_df.index.to_list()
|
14 |
|
15 |
+
match_matrix = similarity_matrix[project_indices, :][:, filtered_indices] # row / column
|
16 |
|
17 |
dense_match_matrix = match_matrix.toarray()
|
18 |
|
19 |
st.write(dense_match_matrix.shape)
|
20 |
|
21 |
+
flat_matrix = dense_match_matrix.flatten()
|
22 |
+
|
23 |
+
# Get the indices of the top 15 values in the flattened matrix
|
24 |
+
top_15_indices = np.argsort(flat_matrix)[-top_x:][::-1]
|
25 |
+
|
26 |
+
# Convert flat indices back to 2D indices
|
27 |
+
top_15_2d_indices = np.unravel_index(top_15_indices, dense_match_matrix.shape)
|
28 |
+
|
29 |
+
# Extract the corresponding values
|
30 |
+
top_15_values = flat_matrix[top_15_indices]
|
31 |
+
|
32 |
+
# Prepare the result with row and column indices from original dataframes
|
33 |
+
top_15_matches = []
|
34 |
+
for value, row, col in zip(top_15_values, top_15_2d_indices[0], top_15_2d_indices[1]):
|
35 |
+
original_row_index = project_indices[row]
|
36 |
+
original_col_index = filtered_indices[col]
|
37 |
+
top_15_matches.append((value, original_row_index, original_col_index))
|
38 |
+
|
39 |
+
st.write(top_15_matches)
|
40 |
+
|
41 |
+
|
42 |
"""
|
43 |
p1_df = filtered_df.loc[top_col_indices].copy()
|
44 |
p1_df['similarity'] = top_values
|