Spaces:

GIZ
/

Development-Project-Synergy-Finder

Running on CPU Upgrade

File size: 1,836 Bytes

f3a1940
 
5f41368
e4ce8fe
f3a1940
e4ce8fe
f17e764
 
3d9250a
 
f17e764
cce39ff
 
f17e764
8250706
2eaf511
 
 
 
5ca912a
8250706
 
 
 
 
 
 
 
 
 
 
 
 
6b7c543
 
8250706
 
 
6b7c543
 
8250706
 
 
 
 
6b7c543
 
d7f99ce
6b7c543
 
6cad12f
f3a1940
6cad12f

import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, coo_matrix
import streamlit as st

# multi_project_matching
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
    # Ensure the matrix is in a suitable format for manipulation
    if not isinstance(similarity_matrix, csr_matrix):
        similarity_matrix = csr_matrix(similarity_matrix)

    filtered_indices = filtered_df.index.to_list()
    project_indices = project_df.index.to_list()

    match_matrix = similarity_matrix[project_indices, :][:, filtered_indices] # row / column

    dense_match_matrix = match_matrix.toarray()
    
    st.write(dense_match_matrix.shape)

    flat_matrix = dense_match_matrix.flatten()
    
    # Get the indices of the top 15 values in the flattened matrix
    top_15_indices = np.argsort(flat_matrix)[-top_x:][::-1]

    # Convert flat indices back to 2D indices
    top_15_2d_indices = np.unravel_index(top_15_indices, dense_match_matrix.shape)
    
    # Extract the corresponding values
    top_15_values = flat_matrix[top_15_indices]

    # Prepare the result with row and column indices from original dataframes
    top_15_matches = []
    org_rows = []
    org_cols = []
    for value, row, col in zip(top_15_values, top_15_2d_indices[0], top_15_2d_indices[1]):
        original_row_index = project_indices[row]
        original_col_index = filtered_indices[col]
        org_rows.append(original_row_index)
        org_cols.append(original_col_index)
        top_15_matches.append((value, original_row_index, original_col_index))
    
    st.write(top_15_matches)


    p1_df = filtered_df.loc[org_cols].copy()
    p1_df['similarity'] = top_15_values

    p2_df = project_df.loc[org_rows].copy()
    p2_df['similarity'] = top_15_values
    print("finished calc matches")

    return p1_df, p2_df