Jan Mühlnikel
commited on
Commit
·
cee7d6e
1
Parent(s):
59435af
return to commit 82f1167e2b824e8eed9a8c440714d67efd84726b
Browse files- app.py +0 -1
- functions/filter_projects.py +3 -8
- modules/navbar.py +1 -0
- requirements.txt +1 -2
- similarity_page.py +4 -10
- src/embeddings.pkl +3 -0
- src/extended_similarities.npz +2 -2
- src/extended_similarities_nonsimorga.npz +2 -2
- src/projects/project_region.csv +1 -1
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
# PAGE CONFIG
|
4 |
st.set_page_config(
|
5 |
page_title='Development Banks Collaboration Analyzer',
|
|
|
1 |
import streamlit as st
|
|
|
2 |
# PAGE CONFIG
|
3 |
st.set_page_config(
|
4 |
page_title='Development Banks Collaboration Analyzer',
|
functions/filter_projects.py
CHANGED
@@ -5,13 +5,9 @@ def contains_code(crs_codes, code_list):
|
|
5 |
codes = str(crs_codes).split(';')
|
6 |
return any(code in code_list for code in codes)
|
7 |
|
8 |
-
def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list,
|
9 |
-
#query,
|
10 |
-
model,
|
11 |
-
#embeddings,
|
12 |
-
TOP_X_PROJECTS=30):
|
13 |
# Check if filters where not all should be selected are empty
|
14 |
-
if crs3_list != [] or crs5_list != [] or sdg_str != ""
|
15 |
|
16 |
# FILTER CRS
|
17 |
if crs3_list and not crs5_list:
|
@@ -39,12 +35,11 @@ def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_c
|
|
39 |
df = df[df['orga_abbreviation'].isin(orga_code_list)]
|
40 |
|
41 |
# FILTER QUERY
|
42 |
-
"""
|
43 |
if query != "" and len(df) > 0:
|
44 |
if len(df) < TOP_X_PROJECTS:
|
45 |
TOP_X_PROJECTS = len(df)
|
46 |
df = search(query, model, embeddings, df, TOP_X_PROJECTS)
|
47 |
-
|
48 |
|
49 |
|
50 |
return df
|
|
|
5 |
codes = str(crs_codes).split(';')
|
6 |
return any(code in code_list for code in codes)
|
7 |
|
8 |
+
def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30):
|
|
|
|
|
|
|
|
|
9 |
# Check if filters where not all should be selected are empty
|
10 |
+
if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "":
|
11 |
|
12 |
# FILTER CRS
|
13 |
if crs3_list and not crs5_list:
|
|
|
35 |
df = df[df['orga_abbreviation'].isin(orga_code_list)]
|
36 |
|
37 |
# FILTER QUERY
|
|
|
38 |
if query != "" and len(df) > 0:
|
39 |
if len(df) < TOP_X_PROJECTS:
|
40 |
TOP_X_PROJECTS = len(df)
|
41 |
df = search(query, model, embeddings, df, TOP_X_PROJECTS)
|
42 |
+
|
43 |
|
44 |
|
45 |
return df
|
modules/navbar.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
import similarity_page
|
3 |
|
4 |
# giz-dsc colors
|
|
|
1 |
import streamlit as st
|
2 |
+
from streamlit_option_menu import option_menu # https://github.com/victoryhb/streamlit-option-menu
|
3 |
import similarity_page
|
4 |
|
5 |
# giz-dsc colors
|
requirements.txt
CHANGED
@@ -6,5 +6,4 @@ scipy==1.12.0
|
|
6 |
faiss-cpu==1.8.0
|
7 |
faiss-gpu==1.7.2
|
8 |
sentence-transformers==2.5.1
|
9 |
-
streamlit-aggrid==0.3.4
|
10 |
-
psutil==5.9.0
|
|
|
6 |
faiss-cpu==1.8.0
|
7 |
faiss-gpu==1.7.2
|
8 |
sentence-transformers==2.5.1
|
9 |
+
streamlit-aggrid==0.3.4.
|
|
similarity_page.py
CHANGED
@@ -111,7 +111,6 @@ def load_model():
|
|
111 |
return model
|
112 |
|
113 |
# Load Embeddings
|
114 |
-
"""
|
115 |
@st.cache_data
|
116 |
def load_embeddings_and_index():
|
117 |
# Load embeddings
|
@@ -120,7 +119,7 @@ def load_embeddings_and_index():
|
|
120 |
embeddings = stored_data["embeddings"]
|
121 |
|
122 |
return embeddings
|
123 |
-
|
124 |
|
125 |
# USE CACHE FUNCTIONS
|
126 |
sim_matrix = load_sim_matrix()
|
@@ -135,7 +134,7 @@ COUNTRY_OPTION_LIST = getCountry()
|
|
135 |
|
136 |
# LOAD MODEL FROM CACHE FO SEMANTIC SEARCH
|
137 |
model = load_model()
|
138 |
-
|
139 |
|
140 |
def show_multi_matching_page():
|
141 |
#st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
|
@@ -199,7 +198,7 @@ def show_multi_matching_page():
|
|
199 |
)
|
200 |
|
201 |
# SEARCH BOX
|
202 |
-
|
203 |
|
204 |
with col3:
|
205 |
# COUNTRY SELECTION
|
@@ -243,11 +242,7 @@ def show_multi_matching_page():
|
|
243 |
|
244 |
# FILTER DF WITH SELECTED FILTER OPTIONS
|
245 |
TOP_X_PROJECTS = 30
|
246 |
-
filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list,
|
247 |
-
#query,
|
248 |
-
model,
|
249 |
-
#embeddings,
|
250 |
-
TOP_X_PROJECTS)
|
251 |
if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
|
252 |
# FIND MATCHES
|
253 |
## If only same country checkbox i sactivated
|
@@ -317,7 +312,6 @@ def show_single_matching_page():
|
|
317 |
else:
|
318 |
search_list = title_search_list
|
319 |
|
320 |
-
|
321 |
project_option = st.selectbox(
|
322 |
label = 'Search for a project',
|
323 |
index = None,
|
|
|
111 |
return model
|
112 |
|
113 |
# Load Embeddings
|
|
|
114 |
@st.cache_data
|
115 |
def load_embeddings_and_index():
|
116 |
# Load embeddings
|
|
|
119 |
embeddings = stored_data["embeddings"]
|
120 |
|
121 |
return embeddings
|
122 |
+
|
123 |
|
124 |
# USE CACHE FUNCTIONS
|
125 |
sim_matrix = load_sim_matrix()
|
|
|
134 |
|
135 |
# LOAD MODEL FROM CACHE FO SEMANTIC SEARCH
|
136 |
model = load_model()
|
137 |
+
embeddings = load_embeddings_and_index()
|
138 |
|
139 |
def show_multi_matching_page():
|
140 |
#st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
|
|
|
198 |
)
|
199 |
|
200 |
# SEARCH BOX
|
201 |
+
query = st.text_input("Search Query")
|
202 |
|
203 |
with col3:
|
204 |
# COUNTRY SELECTION
|
|
|
242 |
|
243 |
# FILTER DF WITH SELECTED FILTER OPTIONS
|
244 |
TOP_X_PROJECTS = 30
|
245 |
+
filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS)
|
|
|
|
|
|
|
|
|
246 |
if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
|
247 |
# FIND MATCHES
|
248 |
## If only same country checkbox i sactivated
|
|
|
312 |
else:
|
313 |
search_list = title_search_list
|
314 |
|
|
|
315 |
project_option = st.selectbox(
|
316 |
label = 'Search for a project',
|
317 |
index = None,
|
src/embeddings.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c9bce42b5bef1adebd5b8e157c7dc3197d75c488931960dd8aa736329c024b1
|
3 |
+
size 67450241
|
src/extended_similarities.npz
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ae835bcd73475fdfc959b9f3682f0efa33240550a9405fb4e48638ea2e3175a
|
3 |
+
size 38542579
|
src/extended_similarities_nonsimorga.npz
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7737f324bf8e6998d1c761b92266d3677bac052a4db1872ecd3e3ba1d920913
|
3 |
+
size 27134286
|
src/projects/project_region.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2788348
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09ff7b7aefbd3cf3a0f651b8120609d755ca7b8abc0b4026170385a5b8b903f9
|
3 |
size 2788348
|