Spaces:

GIZ
/

Development-Project-Synergy-Finder

Sleeping

App Files Files Community

Jan Mühlnikel commited on May 26, 2024

Commit

00341f5

1 Parent(s): 2080e6b

enhanced documentation

Browse files

Files changed (3) hide show

functions/different_orga_filter.py +0 -12
functions/semantic_search.py +3 -5
modules/singlematch_result_table.py +12 -7

functions/different_orga_filter.py DELETED Viewed

@@ -1,12 +0,0 @@
-import pandas as pd
-def different_orga_filter(df, orga):
-    # FILTER COUNTRY
-        country_filtered_df = pd.DataFrame()
-        for c in country_code_list:
-            c_df = df[df["country"].str.contains(c, na=False)]
-            country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
-        df = country_filtered_df
-        return country_filtered_df

functions/semantic_search.py CHANGED Viewed

@@ -1,9 +1,8 @@
-import pickle
 import faiss
-import streamlit as st
-from sentence_transformers import SentenceTransformer
-import pandas as pd
 def search(query, model, embeddings, filtered_df, top_x=20):
         filtered_df_indecies_list = filtered_df.index
@@ -21,7 +20,6 @@ def search(query, model, embeddings, filtered_df, top_x=20):
         D, I = faiss_index.search(query_embedding, k=top_x)  # Search for top x similar items
         # Extract the sentences corresponding to the top indices
-        #print(filtered_df.columns())
         top_indecies = [i for i in I[0]]
         return filtered_df.iloc[top_indecies]

 import faiss
+"""
+Semantic Search Function
+"""
 def search(query, model, embeddings, filtered_df, top_x=20):
         filtered_df_indecies_list = filtered_df.index
         D, I = faiss_index.search(query_embedding, k=top_x)  # Search for top x similar items
         # Extract the sentences corresponding to the top indices
         top_indecies = [i for i in I[0]]
         return filtered_df.iloc[top_indecies]

modules/singlematch_result_table.py CHANGED Viewed

@@ -1,8 +1,17 @@
 import streamlit as st
-import pandas as pd
 def show_single_table(selected_project_index, projects_df, result_df):
     result_df['crs_3_code_list'] = result_df['crs_3_name'].apply(
     lambda x: [""] if x is None else (str(x).split(";")[:-1] if str(x).endswith(";") else str(x).split(";")[:-1])
     )
@@ -25,6 +34,7 @@ def show_single_table(selected_project_index, projects_df, result_df):
     lambda x: [""] if x is None else (str(x).split(";")[:-1] if str(x).endswith(";") else str(x).split(";"))
     )
     st.subheader("Reference Project")
     st.dataframe(
                     sel_p_row[["iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country_name", "country_flag", "sdg_list", "crs_3_code_list", "crs_5_code_list"]],
@@ -92,6 +102,7 @@ def show_single_table(selected_project_index, projects_df, result_df):
                 )
     if len(result_df) == 0:
         st.write("No results found!")
     else:
@@ -112,12 +123,6 @@ def show_single_table(selected_project_index, projects_df, result_df):
                     min_value=0,
                     max_value=100,
                 ),
-                #"similarity": st.column_config.TextColumn(
-                #    "Similarity",
-                #    help="Similarity",
-                #    disabled=True,
-                #    width="small"
-                #),
                 "iati_id": st.column_config.TextColumn(
                     "IATI ID",
                     help="IATI Project ID",

 import streamlit as st
+"""
+Result table of the Single Project Matching
+"""
 def show_single_table(selected_project_index, projects_df, result_df):
+    """
+    TODO: Add this to preprocessing
+    """
     result_df['crs_3_code_list'] = result_df['crs_3_name'].apply(
     lambda x: [""] if x is None else (str(x).split(";")[:-1] if str(x).endswith(";") else str(x).split(";")[:-1])
     )
     lambda x: [""] if x is None else (str(x).split(";")[:-1] if str(x).endswith(";") else str(x).split(";"))
     )
+    # Displaye selected project and infos
     st.subheader("Reference Project")
     st.dataframe(
                     sel_p_row[["iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country_name", "country_flag", "sdg_list", "crs_3_code_list", "crs_5_code_list"]],
                 )
+    # Display the similar projects of teh slected project
     if len(result_df) == 0:
         st.write("No results found!")
     else:
                     min_value=0,
                     max_value=100,
                 ),
                 "iati_id": st.column_config.TextColumn(
                     "IATI ID",
                     help="IATI Project ID",