Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse filesfix end year slicer, remove super short paragraphs
app.py
CHANGED
@@ -94,13 +94,14 @@ with col2:
|
|
94 |
# Year range slider
|
95 |
with col3:
|
96 |
current_year = datetime.now().year
|
97 |
-
default_start_year = current_year - 5
|
98 |
|
|
|
99 |
end_year_range = st.slider(
|
100 |
"Project End Year",
|
101 |
-
min_value=
|
102 |
-
max_value=
|
103 |
-
value=(default_start_year,
|
104 |
)
|
105 |
|
106 |
# Checkbox to control whether to show only exact matches
|
@@ -147,6 +148,14 @@ if button:
|
|
147 |
semantic_all = results[0]
|
148 |
lexical_all = results[1]
|
149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
# 2) Apply a threshold to SEMANTIC results (score >= 0.3)
|
151 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.3]
|
152 |
|
|
|
94 |
# Year range slider
|
95 |
with col3:
|
96 |
current_year = datetime.now().year
|
97 |
+
default_start_year = current_year - 5
|
98 |
|
99 |
+
# 3) The max_value is now the actual max end_year from your collection
|
100 |
end_year_range = st.slider(
|
101 |
"Project End Year",
|
102 |
+
min_value=default_start_year,
|
103 |
+
max_value=max_end_year,
|
104 |
+
value=(default_start_year, max_end_year),
|
105 |
)
|
106 |
|
107 |
# Checkbox to control whether to show only exact matches
|
|
|
148 |
semantic_all = results[0]
|
149 |
lexical_all = results[1]
|
150 |
|
151 |
+
# 2) Filter out content < 20 chars (as intermediate fix to problem that e.g. super short paragraphs with few chars get high similarity score)
|
152 |
+
semantic_all = [
|
153 |
+
r for r in semantic_all if len(r.payload["page_content"]) >= 20
|
154 |
+
]
|
155 |
+
lexical_all = [
|
156 |
+
r for r in lexical_all if len(r.payload["page_content"]) >= 20
|
157 |
+
]
|
158 |
+
|
159 |
# 2) Apply a threshold to SEMANTIC results (score >= 0.3)
|
160 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.3]
|
161 |
|