Jan Mühlnikel commited on
Commit
cee7d6e
·
1 Parent(s): 59435af
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import streamlit as st
2
-
3
  # PAGE CONFIG
4
  st.set_page_config(
5
  page_title='Development Banks Collaboration Analyzer',
 
1
  import streamlit as st
 
2
  # PAGE CONFIG
3
  st.set_page_config(
4
  page_title='Development Banks Collaboration Analyzer',
functions/filter_projects.py CHANGED
@@ -5,13 +5,9 @@ def contains_code(crs_codes, code_list):
5
  codes = str(crs_codes).split(';')
6
  return any(code in code_list for code in codes)
7
 
8
- def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list,
9
- #query,
10
- model,
11
- #embeddings,
12
- TOP_X_PROJECTS=30):
13
  # Check if filters where not all should be selected are empty
14
- if crs3_list != [] or crs5_list != [] or sdg_str != "": #or query != "":
15
 
16
  # FILTER CRS
17
  if crs3_list and not crs5_list:
@@ -39,12 +35,11 @@ def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_c
39
  df = df[df['orga_abbreviation'].isin(orga_code_list)]
40
 
41
  # FILTER QUERY
42
- """
43
  if query != "" and len(df) > 0:
44
  if len(df) < TOP_X_PROJECTS:
45
  TOP_X_PROJECTS = len(df)
46
  df = search(query, model, embeddings, df, TOP_X_PROJECTS)
47
- """
48
 
49
 
50
  return df
 
5
  codes = str(crs_codes).split(';')
6
  return any(code in code_list for code in codes)
7
 
8
+ def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30):
 
 
 
 
9
  # Check if filters where not all should be selected are empty
10
+ if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "":
11
 
12
  # FILTER CRS
13
  if crs3_list and not crs5_list:
 
35
  df = df[df['orga_abbreviation'].isin(orga_code_list)]
36
 
37
  # FILTER QUERY
 
38
  if query != "" and len(df) > 0:
39
  if len(df) < TOP_X_PROJECTS:
40
  TOP_X_PROJECTS = len(df)
41
  df = search(query, model, embeddings, df, TOP_X_PROJECTS)
42
+
43
 
44
 
45
  return df
modules/navbar.py CHANGED
@@ -1,4 +1,5 @@
1
  import streamlit as st
 
2
  import similarity_page
3
 
4
  # giz-dsc colors
 
1
  import streamlit as st
2
+ from streamlit_option_menu import option_menu # https://github.com/victoryhb/streamlit-option-menu
3
  import similarity_page
4
 
5
  # giz-dsc colors
requirements.txt CHANGED
@@ -6,5 +6,4 @@ scipy==1.12.0
6
  faiss-cpu==1.8.0
7
  faiss-gpu==1.7.2
8
  sentence-transformers==2.5.1
9
- streamlit-aggrid==0.3.4
10
- psutil==5.9.0
 
6
  faiss-cpu==1.8.0
7
  faiss-gpu==1.7.2
8
  sentence-transformers==2.5.1
9
+ streamlit-aggrid==0.3.4.
 
similarity_page.py CHANGED
@@ -111,7 +111,6 @@ def load_model():
111
  return model
112
 
113
  # Load Embeddings
114
- """
115
  @st.cache_data
116
  def load_embeddings_and_index():
117
  # Load embeddings
@@ -120,7 +119,7 @@ def load_embeddings_and_index():
120
  embeddings = stored_data["embeddings"]
121
 
122
  return embeddings
123
- """
124
 
125
  # USE CACHE FUNCTIONS
126
  sim_matrix = load_sim_matrix()
@@ -135,7 +134,7 @@ COUNTRY_OPTION_LIST = getCountry()
135
 
136
  # LOAD MODEL FROM CACHE FO SEMANTIC SEARCH
137
  model = load_model()
138
- #embeddings = load_embeddings_and_index()
139
 
140
  def show_multi_matching_page():
141
  #st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
@@ -199,7 +198,7 @@ def show_multi_matching_page():
199
  )
200
 
201
  # SEARCH BOX
202
- #query = st.text_input("Search Query")
203
 
204
  with col3:
205
  # COUNTRY SELECTION
@@ -243,11 +242,7 @@ def show_multi_matching_page():
243
 
244
  # FILTER DF WITH SELECTED FILTER OPTIONS
245
  TOP_X_PROJECTS = 30
246
- filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list,
247
- #query,
248
- model,
249
- #embeddings,
250
- TOP_X_PROJECTS)
251
  if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
252
  # FIND MATCHES
253
  ## If only same country checkbox i sactivated
@@ -317,7 +312,6 @@ def show_single_matching_page():
317
  else:
318
  search_list = title_search_list
319
 
320
-
321
  project_option = st.selectbox(
322
  label = 'Search for a project',
323
  index = None,
 
111
  return model
112
 
113
  # Load Embeddings
 
114
  @st.cache_data
115
  def load_embeddings_and_index():
116
  # Load embeddings
 
119
  embeddings = stored_data["embeddings"]
120
 
121
  return embeddings
122
+
123
 
124
  # USE CACHE FUNCTIONS
125
  sim_matrix = load_sim_matrix()
 
134
 
135
  # LOAD MODEL FROM CACHE FO SEMANTIC SEARCH
136
  model = load_model()
137
+ embeddings = load_embeddings_and_index()
138
 
139
  def show_multi_matching_page():
140
  #st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
 
198
  )
199
 
200
  # SEARCH BOX
201
+ query = st.text_input("Search Query")
202
 
203
  with col3:
204
  # COUNTRY SELECTION
 
242
 
243
  # FILTER DF WITH SELECTED FILTER OPTIONS
244
  TOP_X_PROJECTS = 30
245
+ filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS)
 
 
 
 
246
  if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
247
  # FIND MATCHES
248
  ## If only same country checkbox i sactivated
 
312
  else:
313
  search_list = title_search_list
314
 
 
315
  project_option = st.selectbox(
316
  label = 'Search for a project',
317
  index = None,
src/embeddings.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c9bce42b5bef1adebd5b8e157c7dc3197d75c488931960dd8aa736329c024b1
3
+ size 67450241
src/extended_similarities.npz CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ae30cf9cbc19ace8dcd5c5f1e817d8e9597a32798b19c8cfd677ace7734e35e
3
- size 30499836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae835bcd73475fdfc959b9f3682f0efa33240550a9405fb4e48638ea2e3175a
3
+ size 38542579
src/extended_similarities_nonsimorga.npz CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef348fe69f2d937824178b5991a7b298e783886db369cc1e10ba8a9d084e2374
3
- size 11330845
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7737f324bf8e6998d1c761b92266d3677bac052a4db1872ecd3e3ba1d920913
3
+ size 27134286
src/projects/project_region.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09959ff959f13fafb468277ae0bdf6a1f0d068550f8ed2e36104edd2008d0b1e
3
  size 2788348
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09ff7b7aefbd3cf3a0f651b8120609d755ca7b8abc0b4026170385a5b8b903f9
3
  size 2788348