annikwag commited on
Commit
0d83a6b
·
verified ·
1 Parent(s): 3f65b01

Update app.py

Browse files

fix end year slicer, remove super short paragraphs

Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -94,13 +94,14 @@ with col2:
94
  # Year range slider
95
  with col3:
96
  current_year = datetime.now().year
97
- default_start_year = current_year - 5 # Default to 5 years ago
98
 
 
99
  end_year_range = st.slider(
100
  "Project End Year",
101
- min_value=2010,
102
- max_value=2030,
103
- value=(default_start_year, current_year)
104
  )
105
 
106
  # Checkbox to control whether to show only exact matches
@@ -147,6 +148,14 @@ if button:
147
  semantic_all = results[0]
148
  lexical_all = results[1]
149
 
 
 
 
 
 
 
 
 
150
  # 2) Apply a threshold to SEMANTIC results (score >= 0.3)
151
  semantic_thresholded = [r for r in semantic_all if r.score >= 0.3]
152
 
 
94
  # Year range slider
95
  with col3:
96
  current_year = datetime.now().year
97
+ default_start_year = current_year - 5
98
 
99
+ # 3) The max_value is now the actual max end_year from your collection
100
  end_year_range = st.slider(
101
  "Project End Year",
102
+ min_value=default_start_year,
103
+ max_value=max_end_year,
104
+ value=(default_start_year, max_end_year),
105
  )
106
 
107
  # Checkbox to control whether to show only exact matches
 
148
  semantic_all = results[0]
149
  lexical_all = results[1]
150
 
151
+ # 2) Filter out content < 20 chars (as intermediate fix to problem that e.g. super short paragraphs with few chars get high similarity score)
152
+ semantic_all = [
153
+ r for r in semantic_all if len(r.payload["page_content"]) >= 20
154
+ ]
155
+ lexical_all = [
156
+ r for r in lexical_all if len(r.payload["page_content"]) >= 20
157
+ ]
158
+
159
  # 2) Apply a threshold to SEMANTIC results (score >= 0.3)
160
  semantic_thresholded = [r for r in semantic_all if r.score >= 0.3]
161