Jan Mühlnikel commited on
Commit
c094750
·
1 Parent(s): 889a1b1
Files changed (2) hide show
  1. functions/single_similar.py +17 -3
  2. similarity_page.py +1 -0
functions/single_similar.py CHANGED
@@ -2,13 +2,27 @@ import pandas as pd
2
  import numpy as np
3
 
4
  def find_similar(p_index, similarity_matrix, filtered_df, top_x):
5
- selected_row = similarity_matrix[p_index]
 
6
  filtered_indices = filtered_df.index.tolist()
7
  print(filtered_indices)
8
 
9
- index_position_mapping = {index: position for position, index in enumerate(filtered_indices)}
10
  print(index_position_mapping)
11
 
12
- return "top_projects_df"
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
 
2
  import numpy as np
3
 
4
  def find_similar(p_index, similarity_matrix, filtered_df, top_x):
5
+
6
+ # filter out just projects from filtered df
7
  filtered_indices = filtered_df.index.tolist()
8
  print(filtered_indices)
9
 
10
+ index_position_mapping = {position: index for position, index in enumerate(filtered_indices)}
11
  print(index_position_mapping)
12
 
13
+ filtered_column_sim_matrix = similarity_matrix[:, filtered_indices]
14
+
15
+ # filter out the row of the selected poject
16
+ project_row = filtered_column_sim_matrix[p_index]
17
+ sorted_indices = np.argsort(project_row)
18
+ top_10_indices_descending = sorted_indices[-10:][::-1]
19
+ top_10_original_indices = [index_position_mapping[position] for position in top_10_indices_descending]
20
+
21
+ top_10_values_descending = project_row[top_10_indices_descending]
22
+
23
+ result_df = filtered_df.iloc[top_10_original_indices]
24
+ result_df["similarity"] = top_10_values_descending
25
+
26
+ return result_df
27
 
28
 
similarity_page.py CHANGED
@@ -351,5 +351,6 @@ def show_single_matching_page():
351
  if isinstance(filtered_df_s, pd.DataFrame) and len(filtered_df_s) != 0:
352
 
353
  top_projects_df = find_similar(selected_project_index, sim_matrix, filtered_df_s, 10)
 
354
  #show_single_table(top_projects_df)
355
 
 
351
  if isinstance(filtered_df_s, pd.DataFrame) and len(filtered_df_s) != 0:
352
 
353
  top_projects_df = find_similar(selected_project_index, sim_matrix, filtered_df_s, 10)
354
+ st.dataframe(top_projects_df)
355
  #show_single_table(top_projects_df)
356