File size: 2,774 Bytes
f3a1940 f17e764 e4ce8fe f3a1940 29fd9ee f3a1940 1081227 f3a1940 1081227 29fd9ee e4ce8fe f17e764 e4ce8fe f17e764 b8028b3 f17e764 dda6e4a f17e764 d7f99ce c8d1d9b f17e764 d7f99ce c8d1d9b f17e764 d7f99ce 6cad12f f3a1940 6cad12f f3a1940 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, lil_matrix
import streamlit as st
"""
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
# matching project2 can be any project
# indecies (rows) = project1
# columns = project2
# -> find matches
# filter out all row considering the filter
filtered_df_indecies_list = filtered_df.index
project_df_indecies_list = project_df.index
np.fill_diagonal(similarity_matrix, 0)
match_matrix = similarity_matrix[filtered_df_indecies_list, :][:, project_df_indecies_list]
best_matches_list = np.argsort(match_matrix, axis=None)
if len(best_matches_list) < top_x:
top_x = len(best_matches_list)
# get row (project1) and column (project2) with highest similarity in filtered df
top_indices = np.unravel_index(best_matches_list[-top_x:], match_matrix.shape)
# get the corresponding similarity values
top_values = match_matrix[top_indices]
p1_df = filtered_df.iloc[top_indices[0]]
p1_df["similarity"] = top_values
p2_df = project_df.iloc[top_indices[1]]
p2_df["similarity"] = top_values
return p1_df, p2_df
"""
# multi_project_matching
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
st.write(filtered_df.shape)
st.write(project_df.shape)
st.write(similarity_matrix.shape)
# Ensure the matrix is in a suitable format for manipulation
if not isinstance(similarity_matrix, csr_matrix):
similarity_matrix = csr_matrix(similarity_matrix)
# Get indices from dataframes
filtered_df_indices = filtered_df.index.to_list()
project_df_indices = project_df.index.to_list()
# Select submatrix based on indices from both dataframes
match_matrix = similarity_matrix[filtered_df_indices, :][:, project_df_indices]
st.write(match_matrix.shape)
# Get the linear indices of the top 'top_x' values
# (flattened index to handle the sparse matrix more effectively)
linear_indices = np.argsort(match_matrix.data)[-top_x:]
if len(linear_indices) < top_x:
top_x = len(linear_indices)
# Convert flat indices to 2D indices using the shape of the submatrix
top_indices = np.unravel_index(linear_indices, match_matrix.shape)
# Get the corresponding similarity values
top_values = match_matrix.data[linear_indices]
# Create resulting dataframes with top matches and their similarity scores
st.write(top_indices)
p1_df = filtered_df.loc[top_indices[0]].copy()
p1_df['similarity'] = top_values
st.dataframe(p1_df)
p2_df = project_df.loc[top_indices[1]].copy()
p2_df['similarity'] = top_values
st.dataframe(p2_df)
print("finished calc matches")
return p1_df, p2_df
|