Jan Mühlnikel
commited on
Commit
·
2a6aea4
1
Parent(s):
b0c3715
experiment
Browse files- functions/calc_matches.py +2 -35
- similarity_page.py +2 -2
functions/calc_matches.py
CHANGED
@@ -3,42 +3,8 @@ import numpy as np
|
|
3 |
from scipy.sparse import csr_matrix, lil_matrix
|
4 |
import streamlit as st
|
5 |
|
6 |
-
"""
|
7 |
-
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
8 |
-
# matching project2 can be any project
|
9 |
-
# indecies (rows) = project1
|
10 |
-
# columns = project2
|
11 |
-
# -> find matches
|
12 |
-
|
13 |
-
# filter out all row considering the filter
|
14 |
-
filtered_df_indecies_list = filtered_df.index
|
15 |
-
project_df_indecies_list = project_df.index
|
16 |
-
|
17 |
-
np.fill_diagonal(similarity_matrix, 0)
|
18 |
-
match_matrix = similarity_matrix[filtered_df_indecies_list, :][:, project_df_indecies_list]
|
19 |
-
|
20 |
-
best_matches_list = np.argsort(match_matrix, axis=None)
|
21 |
-
|
22 |
-
if len(best_matches_list) < top_x:
|
23 |
-
top_x = len(best_matches_list)
|
24 |
-
|
25 |
-
# get row (project1) and column (project2) with highest similarity in filtered df
|
26 |
-
top_indices = np.unravel_index(best_matches_list[-top_x:], match_matrix.shape)
|
27 |
-
|
28 |
-
# get the corresponding similarity values
|
29 |
-
top_values = match_matrix[top_indices]
|
30 |
-
|
31 |
-
p1_df = filtered_df.iloc[top_indices[0]]
|
32 |
-
p1_df["similarity"] = top_values
|
33 |
-
p2_df = project_df.iloc[top_indices[1]]
|
34 |
-
p2_df["similarity"] = top_values
|
35 |
-
|
36 |
-
return p1_df, p2_df
|
37 |
-
"""
|
38 |
-
|
39 |
# multi_project_matching
|
40 |
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
41 |
-
st.dataframe(project_df.head(5))
|
42 |
st.write(filtered_df.shape)
|
43 |
st.write(project_df.shape)
|
44 |
st.write(similarity_matrix.shape)
|
@@ -56,7 +22,8 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
|
56 |
project_df_index_map = {i: index for i, index in enumerate(project_df_indices)}
|
57 |
|
58 |
# Select submatrix based on indices from both dataframes
|
59 |
-
match_matrix = similarity_matrix[filtered_df_indices, :][:, project_df_indices]
|
|
|
60 |
|
61 |
st.write(match_matrix.shape)
|
62 |
|
|
|
3 |
from scipy.sparse import csr_matrix, lil_matrix
|
4 |
import streamlit as st
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
# multi_project_matching
|
7 |
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
|
|
8 |
st.write(filtered_df.shape)
|
9 |
st.write(project_df.shape)
|
10 |
st.write(similarity_matrix.shape)
|
|
|
22 |
project_df_index_map = {i: index for i, index in enumerate(project_df_indices)}
|
23 |
|
24 |
# Select submatrix based on indices from both dataframes
|
25 |
+
#match_matrix = similarity_matrix[filtered_df_indices, :][:, project_df_indices]
|
26 |
+
match_matrix = similarity_matrix[np.ix_(filtered_df_indices, project_df_indices)]
|
27 |
|
28 |
st.write(match_matrix.shape)
|
29 |
|
similarity_page.py
CHANGED
@@ -272,10 +272,10 @@ def show_multi_matching_page():
|
|
272 |
## if show only different orgas checkbox is activated
|
273 |
if different_orga_checkbox:
|
274 |
with st.spinner('Please wait...'):
|
275 |
-
p1_df, p2_df = calc_matches(filtered_df,
|
276 |
else:
|
277 |
with st.spinner('Please wait...'):
|
278 |
-
p1_df, p2_df = calc_matches(filtered_df,
|
279 |
|
280 |
# SHOW THE RESULT
|
281 |
show_multi_table(p1_df, p2_df)
|
|
|
272 |
## if show only different orgas checkbox is activated
|
273 |
if different_orga_checkbox:
|
274 |
with st.spinner('Please wait...'):
|
275 |
+
p1_df, p2_df = calc_matches(filtered_df, compare_df, nonsameorgas_sim_matrix, TOP_X_PROJECTS)
|
276 |
else:
|
277 |
with st.spinner('Please wait...'):
|
278 |
+
p1_df, p2_df = calc_matches(filtered_df, compare_df, sim_matrix, TOP_X_PROJECTS)
|
279 |
|
280 |
# SHOW THE RESULT
|
281 |
show_multi_table(p1_df, p2_df)
|