myshirk commited on
Commit
03e0863
Β·
verified Β·
1 Parent(s): b183d7b

add filter clearing

Browse files
Files changed (1) hide show
  1. app.py +25 -64
app.py CHANGED
@@ -69,6 +69,7 @@ w_group = pn.widgets.Checkbox(name="Group by Question Text", value=False)
69
 
70
  w_semquery = pn.widgets.TextInput(name="Semantic Query")
71
  w_search_button = pn.widgets.Button(name="Semantic Search", button_type="primary")
 
72
 
73
  # ──────────────────────────────────────────────────────────────────────
74
  # 3) Unified Results Table (Tabulator)
@@ -79,47 +80,20 @@ result_table = pn.widgets.Tabulator(
79
  sizing_mode="stretch_width",
80
  layout='fit_columns',
81
  show_index=False,
 
82
  )
83
 
84
- @pn.depends(w_countries, w_years, w_keyword, w_group, watch=True)
85
- def update_table(countries, years, keyword, group):
86
- filt = df.copy()
87
- if countries:
88
- filt = filt[filt["country"].isin(countries)]
89
- if years:
90
- filt = filt[filt["year"].isin(years)]
91
- if keyword:
92
- filt = filt[
93
- filt["question_text"].str.contains(keyword, case=False, na=False) |
94
- filt["answer_text"].str.contains(keyword, case=False, na=False) |
95
- filt["question_code"].astype(str).str.contains(keyword, case=False, na=False)
96
- ]
97
-
98
- if group:
99
- grouped = (
100
- filt.groupby("question_text")
101
- .agg({
102
- "country": lambda x: sorted(set(x)),
103
- "year": lambda x: sorted(set(x)),
104
- "answer_text": lambda x: list(x)[:3]
105
- })
106
- .reset_index()
107
- .rename(columns={
108
- "country": "Countries",
109
- "year": "Years",
110
- "answer_text": "Sample Answers"
111
- })
112
- )
113
- result_table.value = grouped
114
- else:
115
- result_table.value = filt[["country", "year", "question_text", "answer_text"]]
116
-
117
  def semantic_search(event=None):
118
  query = w_semquery.value.strip()
119
  if not query:
120
  return
121
 
122
- # Step 1: Filter the full dataframe
 
 
123
  filt = df.copy()
124
  if w_countries.value:
125
  filt = filt[filt["country"].isin(w_countries.value)]
@@ -132,54 +106,39 @@ def semantic_search(event=None):
132
  filt["question_code"].astype(str).str.contains(w_keyword.value, case=False, na=False)
133
  ]
134
 
135
- # Step 2: Load only embeddings for the filtered rows
136
- model, ids_list, emb_tensor = get_semantic_resources()
137
-
138
- # Create a mask for filtered IDs
139
  filtered_ids = filt["id"].tolist()
140
  id_to_index = {id_: i for i, id_ in enumerate(ids_list)}
141
  filtered_indices = [id_to_index[id_] for id_ in filtered_ids if id_ in id_to_index]
142
-
143
- # Subset the embedding tensor
 
 
144
  filtered_embs = emb_tensor[filtered_indices]
145
-
146
- # Step 3: Semantic search only within filtered subset
147
  q_vec = model.encode(query, convert_to_tensor=True, device="cpu").cpu()
148
  sims = util.cos_sim(q_vec, filtered_embs)[0]
149
  top_vals, top_idx = torch.topk(sims, k=50)
150
-
151
  top_filtered_ids = [filtered_ids[i] for i in top_idx.tolist()]
152
  sem_rows = filt[filt["id"].isin(top_filtered_ids)].copy()
153
  score_map = dict(zip(top_filtered_ids, top_vals.tolist()))
154
  sem_rows["Score"] = sem_rows["id"].map(score_map)
155
  sem_rows = sem_rows.sort_values("Score", ascending=False)
156
-
157
- # Final output
158
- result_table.value = sem_rows[["Score", "country", "year", "question_text", "answer_text"]]
159
 
 
160
 
161
- filt = df.copy()
162
- if w_countries.value:
163
- filt = filt[filt["country"].isin(w_countries.value)]
164
- if w_years.value:
165
- filt = filt[filt["year"].isin(w_years.value)]
166
- if w_keyword.value:
167
- filt = filt[
168
- filt["question_text"].str.contains(w_keyword.value, case=False, na=False) |
169
- filt["answer_text"].str.contains(w_keyword.value, case=False, na=False) |
170
- filt["question_code"].astype(str).str.contains(w_keyword.value, case=False, na=False)
171
- ]
172
-
173
- remainder = filt.loc[~filt["id"].isin(sem_ids)].copy()
174
- remainder["Score"] = ""
175
-
176
- combined = pd.concat([sem_rows, remainder], ignore_index=True)
177
- result_table.value = combined[["Score", "country", "year", "question_text", "answer_text"]]
178
 
179
  w_search_button.on_click(semantic_search)
 
180
 
181
  # ──────────────────────────────────────────────────────────────────────
182
- # 4) Layout
183
  # ──────────────────────────────────────────────────────────────────────
184
  sidebar = pn.Column(
185
  "## πŸ”Ž Filters",
@@ -187,6 +146,8 @@ sidebar = pn.Column(
187
  pn.Spacer(height=20),
188
  "## 🧠 Semantic Search",
189
  w_semquery, w_search_button,
 
 
190
  width=300
191
  )
192
 
 
69
 
70
  w_semquery = pn.widgets.TextInput(name="Semantic Query")
71
  w_search_button = pn.widgets.Button(name="Semantic Search", button_type="primary")
72
+ w_clear_filters = pn.widgets.Button(name="Clear Filters", button_type="warning")
73
 
74
  # ──────────────────────────────────────────────────────────────────────
75
  # 3) Unified Results Table (Tabulator)
 
80
  sizing_mode="stretch_width",
81
  layout='fit_columns',
82
  show_index=False,
83
+ show_filter=True
84
  )
85
 
86
+ # ──────────────────────────────────────────────────────────────────────
87
+ # 4) Semantic Search with Filtering
88
+ # ──────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def semantic_search(event=None):
90
  query = w_semquery.value.strip()
91
  if not query:
92
  return
93
 
94
+ model, ids_list, emb_tensor = get_semantic_resources()
95
+
96
+ # Apply filters first
97
  filt = df.copy()
98
  if w_countries.value:
99
  filt = filt[filt["country"].isin(w_countries.value)]
 
106
  filt["question_code"].astype(str).str.contains(w_keyword.value, case=False, na=False)
107
  ]
108
 
 
 
 
 
109
  filtered_ids = filt["id"].tolist()
110
  id_to_index = {id_: i for i, id_ in enumerate(ids_list)}
111
  filtered_indices = [id_to_index[id_] for id_ in filtered_ids if id_ in id_to_index]
112
+ if not filtered_indices:
113
+ result_table.value = pd.DataFrame(columns=["Score", "country", "year", "question_text", "answer_text"])
114
+ return
115
+
116
  filtered_embs = emb_tensor[filtered_indices]
117
+
 
118
  q_vec = model.encode(query, convert_to_tensor=True, device="cpu").cpu()
119
  sims = util.cos_sim(q_vec, filtered_embs)[0]
120
  top_vals, top_idx = torch.topk(sims, k=50)
121
+
122
  top_filtered_ids = [filtered_ids[i] for i in top_idx.tolist()]
123
  sem_rows = filt[filt["id"].isin(top_filtered_ids)].copy()
124
  score_map = dict(zip(top_filtered_ids, top_vals.tolist()))
125
  sem_rows["Score"] = sem_rows["id"].map(score_map)
126
  sem_rows = sem_rows.sort_values("Score", ascending=False)
 
 
 
127
 
128
+ result_table.value = sem_rows[["Score", "country", "year", "question_text", "answer_text"]]
129
 
130
+ def clear_filters(event=None):
131
+ w_countries.value = []
132
+ w_years.value = []
133
+ w_keyword.value = ""
134
+ w_semquery.value = ""
135
+ result_table.value = df[["country", "year", "question_text", "answer_text"]].copy()
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  w_search_button.on_click(semantic_search)
138
+ w_clear_filters.on_click(clear_filters)
139
 
140
  # ──────────────────────────────────────────────────────────────────────
141
+ # 5) Layout
142
  # ──────────────────────────────────────────────────────────────────────
143
  sidebar = pn.Column(
144
  "## πŸ”Ž Filters",
 
146
  pn.Spacer(height=20),
147
  "## 🧠 Semantic Search",
148
  w_semquery, w_search_button,
149
+ pn.Spacer(height=20),
150
+ w_clear_filters,
151
  width=300
152
  )
153