|
import pandas as pd |
|
from functions.semantic_search import search |
|
|
|
def contains_code(crs_codes, code_list): |
|
codes = str(crs_codes).split(';') |
|
return any(code in code_list for code in codes) |
|
|
|
def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30): |
|
|
|
if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "": |
|
|
|
|
|
if crs3_list and not crs5_list: |
|
df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))] |
|
elif crs3_list and crs5_list: |
|
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))] |
|
elif not crs3_list and crs5_list: |
|
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))] |
|
|
|
|
|
if sdg_str != "": |
|
df = df[df["sgd_pred_code"] == int(sdg_str)] |
|
|
|
|
|
if country_code_list != []: |
|
country_filtered_df = pd.DataFrame() |
|
for c in country_code_list: |
|
c_df = df[df["country"].str.contains(c, na=False)] |
|
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False) |
|
|
|
df = country_filtered_df |
|
|
|
|
|
if orga_code_list != []: |
|
df = df[df['orga_abbreviation'].isin(orga_code_list)] |
|
|
|
|
|
if query != "" and len(df) > 0: |
|
if len(df) < TOP_X_PROJECTS: |
|
TOP_X_PROJECTS = len(df) |
|
df = search(query, model, embeddings, df, TOP_X_PROJECTS) |
|
|
|
|
|
|
|
return df |
|
|
|
|
|
|
|
|
|
|