myshirk commited on
Commit
f7d7a98
Β·
verified Β·
1 Parent(s): 5b49841

add our app

Browse files
Files changed (1) hide show
  1. app.py +163 -137
app.py CHANGED
@@ -1,147 +1,173 @@
1
- import io
2
- import random
3
- from typing import List, Tuple
4
 
5
- import aiohttp
6
  import panel as pn
7
- from PIL import Image
8
- from transformers import CLIPModel, CLIPProcessor
9
-
10
- pn.extension(design="bootstrap", sizing_mode="stretch_width")
11
-
12
- ICON_URLS = {
13
- "brand-github": "https://github.com/holoviz/panel",
14
- "brand-twitter": "https://twitter.com/Panel_Org",
15
- "brand-linkedin": "https://www.linkedin.com/company/panel-org",
16
- "message-circle": "https://discourse.holoviz.org/",
17
- "brand-discord": "https://discord.gg/AXRHnJU6sP",
18
- }
19
-
20
-
21
- async def random_url(_):
22
- pet = random.choice(["cat", "dog"])
23
- api_url = f"https://api.the{pet}api.com/v1/images/search"
24
- async with aiohttp.ClientSession() as session:
25
- async with session.get(api_url) as resp:
26
- return (await resp.json())[0]["url"]
27
-
28
-
29
- @pn.cache
30
- def load_processor_model(
31
- processor_name: str, model_name: str
32
- ) -> Tuple[CLIPProcessor, CLIPModel]:
33
- processor = CLIPProcessor.from_pretrained(processor_name)
34
- model = CLIPModel.from_pretrained(model_name)
35
- return processor, model
36
-
37
-
38
- async def open_image_url(image_url: str) -> Image:
39
- async with aiohttp.ClientSession() as session:
40
- async with session.get(image_url) as resp:
41
- return Image.open(io.BytesIO(await resp.read()))
42
-
43
-
44
- def get_similarity_scores(class_items: List[str], image: Image) -> List[float]:
45
- processor, model = load_processor_model(
46
- "openai/clip-vit-base-patch32", "openai/clip-vit-base-patch32"
47
  )
48
- inputs = processor(
49
- text=class_items,
50
- images=[image],
51
- return_tensors="pt", # pytorch tensors
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  )
53
- outputs = model(**inputs)
54
- logits_per_image = outputs.logits_per_image
55
- class_likelihoods = logits_per_image.softmax(dim=1).detach().numpy()
56
- return class_likelihoods[0]
57
-
58
-
59
- async def process_inputs(class_names: List[str], image_url: str):
60
- """
61
- High level function that takes in the user inputs and returns the
62
- classification results as panel objects.
63
- """
64
- try:
65
- main.disabled = True
66
- if not image_url:
67
- yield "##### ⚠️ Provide an image URL"
68
- return
69
-
70
- yield "##### βš™ Fetching image and running model..."
71
- try:
72
- pil_img = await open_image_url(image_url)
73
- img = pn.pane.Image(pil_img, height=400, align="center")
74
- except Exception as e:
75
- yield f"##### πŸ˜” Something went wrong, please try a different URL!"
76
- return
77
-
78
- class_items = class_names.split(",")
79
- class_likelihoods = get_similarity_scores(class_items, pil_img)
80
-
81
- # build the results column
82
- results = pn.Column("##### πŸŽ‰ Here are the results!", img)
83
-
84
- for class_item, class_likelihood in zip(class_items, class_likelihoods):
85
- row_label = pn.widgets.StaticText(
86
- name=class_item.strip(), value=f"{class_likelihood:.2%}", align="center"
87
- )
88
- row_bar = pn.indicators.Progress(
89
- value=int(class_likelihood * 100),
90
- sizing_mode="stretch_width",
91
- bar_color="secondary",
92
- margin=(0, 10),
93
- design=pn.theme.Material,
94
- )
95
- results.append(pn.Column(row_label, row_bar))
96
- yield results
97
- finally:
98
- main.disabled = False
99
-
100
-
101
- # create widgets
102
- randomize_url = pn.widgets.Button(name="Randomize URL", align="end")
103
-
104
- image_url = pn.widgets.TextInput(
105
- name="Image URL to classify",
106
- value=pn.bind(random_url, randomize_url),
107
- )
108
- class_names = pn.widgets.TextInput(
109
- name="Comma separated class names",
110
- placeholder="Enter possible class names, e.g. cat, dog",
111
- value="cat, dog, parrot",
112
- )
113
 
114
- input_widgets = pn.Column(
115
- "##### 😊 Click randomize or paste a URL to start classifying!",
116
- pn.Row(image_url, randomize_url),
117
- class_names,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  )
119
 
120
- # add interactivity
121
- interactive_result = pn.panel(
122
- pn.bind(process_inputs, image_url=image_url, class_names=class_names),
123
- height=600,
124
- )
125
-
126
- # add footer
127
- footer_row = pn.Row(pn.Spacer(), align="center")
128
- for icon, url in ICON_URLS.items():
129
- href_button = pn.widgets.Button(icon=icon, width=35, height=35)
130
- href_button.js_on_click(code=f"window.open('{url}')")
131
- footer_row.append(href_button)
132
- footer_row.append(pn.Spacer())
133
-
134
- # create dashboard
135
- main = pn.WidgetBox(
136
- input_widgets,
137
- interactive_result,
138
- footer_row,
139
  )
140
 
141
- title = "Panel Demo - Image Classification"
142
- pn.template.BootstrapTemplate(
143
- title=title,
144
  main=main,
145
- main_max_width="min(50%, 698px)",
146
- header_background="#F08080",
147
- ).servable(title=title)
 
1
+ # app_panel.py – Panel-based CGD Survey Explorer
 
 
2
 
3
+ import os, io, json, gc
4
  import panel as pn
5
+ import pandas as pd
6
+ import boto3, torch
7
+ from sentence_transformers import SentenceTransformer, util
8
+ import psycopg2
9
+
10
+ pn.extension()
11
+
12
+ # ───────────────────────────────────────────────
13
+ # 1) Data / Embeddings Loaders
14
+ # ───────────────────────────────────────────────
15
+ DB_HOST = os.getenv("DB_HOST")
16
+ DB_PORT = os.getenv("DB_PORT", "5432")
17
+ DB_NAME = os.getenv("DB_NAME")
18
+ DB_USER = os.getenv("DB_USER")
19
+ DB_PASSWORD = os.getenv("DB_PASSWORD")
20
+
21
+ @pn.cache()
22
+ def get_data():
23
+ conn = psycopg2.connect(
24
+ host=DB_HOST, port=DB_PORT,
25
+ dbname=DB_NAME, user=DB_USER, password=DB_PASSWORD,
26
+ sslmode="require"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  )
28
+ df_ = pd.read_sql_query("""
29
+ SELECT id, country, year, section,
30
+ question_code, question_text,
31
+ answer_code, answer_text
32
+ FROM survey_info;
33
+ """, conn)
34
+ conn.close()
35
+ return df_
36
+
37
+ df = get_data()
38
+ row_lookup = {row.id: i for i, row in df.iterrows()}
39
+
40
+ @pn.cache()
41
+ def load_embeddings():
42
+ BUCKET, KEY = "cgd-embeddings-bucket", "survey_info_embeddings.pt"
43
+ buf = io.BytesIO()
44
+ boto3.client("s3").download_fileobj(BUCKET, KEY, buf)
45
+ buf.seek(0)
46
+ ckpt = torch.load(buf, map_location="cpu")
47
+ buf.close(); gc.collect()
48
+ return ckpt["ids"], ckpt["embeddings"]
49
+
50
+ ids_list, emb_tensor = load_embeddings()
51
+
52
+ @pn.cache()
53
+ def get_st_model():
54
+ return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device="cpu")
55
+
56
+ # ───────────────────────────────────────────────
57
+ # 2) Widgets
58
+ # ───────────────────────────────────────────────
59
+ country_opts = sorted(df["country"].dropna().unique())
60
+ year_opts = sorted(df["year"].dropna().unique())
61
+
62
+ w_countries = pn.widgets.MultiSelect(name="Countries", options=country_opts)
63
+ w_years = pn.widgets.MultiSelect(name="Years", options=year_opts)
64
+ w_keyword = pn.widgets.TextInput(name="Keyword Search", placeholder="Search questions or answers")
65
+ w_group = pn.widgets.Checkbox(name="Group by Question Text", value=False)
66
+
67
+ # Semantic search
68
+ w_semquery = pn.widgets.TextInput(name="Semantic Query")
69
+ w_search_button = pn.widgets.Button(name="Search", button_type="primary", disabled=False)
70
+
71
+ # ───────────────────────────────────────────────
72
+ # 3) Filtering Logic
73
+ # ───────────────────────────────────────────────
74
+ @pn.depends(w_countries, w_years, w_keyword, w_group)
75
+ def keyword_filter(countries, years, keyword, group):
76
+ filt = df.copy()
77
+ if countries:
78
+ filt = filt[filt["country"].isin(countries)]
79
+ if years:
80
+ filt = filt[filt["year"].isin(years)]
81
+ if keyword:
82
+ filt = filt[
83
+ filt["question_text"].str.contains(keyword, case=False, na=False) |
84
+ filt["answer_text"].str.contains(keyword, case=False, na=False) |
85
+ filt["question_code"].astype(str).str.contains(keyword, case=False, na=False)
86
+ ]
87
+
88
+ if group:
89
+ grouped = (
90
+ filt.groupby("question_text")
91
+ .agg({
92
+ "country": lambda x: sorted(set(x)),
93
+ "year": lambda x: sorted(set(x)),
94
+ "answer_text": lambda x: list(x)[:3]
95
+ })
96
+ .reset_index()
97
+ .rename(columns={
98
+ "country": "Countries",
99
+ "year": "Years",
100
+ "answer_text": "Sample Answers"
101
+ })
102
+ )
103
+ return pn.pane.DataFrame(grouped, sizing_mode="stretch_width", height=400)
104
+
105
+ return pn.pane.DataFrame(
106
+ filt[["country", "year", "question_text", "answer_text"]],
107
+ sizing_mode="stretch_width", height=400
108
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ # ───────────────────────────────────────────────
111
+ # 4) Semantic Search Callback
112
+ # ───────────────────────────────────────────────
113
+ def semantic_search(event=None):
114
+ query = w_semquery.value.strip()
115
+ if not query:
116
+ return
117
+
118
+ model = get_st_model()
119
+ q_vec = model.encode(query, convert_to_tensor=True, device="cpu").cpu()
120
+ sims = util.cos_sim(q_vec, emb_tensor)[0]
121
+ top_vals, top_idx = torch.topk(sims, k=50)
122
+
123
+ sem_ids = [ids_list[i] for i in top_idx.tolist()]
124
+ sem_rows = df.loc[df["id"].isin(sem_ids)].copy()
125
+ score_map = dict(zip(sem_ids, top_vals.tolist()))
126
+ sem_rows["Score"] = sem_rows["id"].map(score_map)
127
+ sem_rows = sem_rows.sort_values("Score", ascending=False)
128
+
129
+ # Get keyword-filtered data
130
+ keyword_df = keyword_filter(
131
+ w_countries.value,
132
+ w_years.value,
133
+ w_keyword.value,
134
+ False
135
+ ).object
136
+
137
+ remainder = keyword_df.loc[~keyword_df["id"].isin(sem_ids)].copy()
138
+ remainder["Score"] = ""
139
+
140
+ combined = pd.concat([sem_rows, remainder], ignore_index=True)
141
+
142
+ result_pane.object = combined[["Score", "country", "year", "question_text", "answer_text"]]
143
+
144
+ w_search_button.on_click(semantic_search)
145
+
146
+ result_pane = pn.pane.DataFrame(height=500, sizing_mode="stretch_width")
147
+
148
+ # ───────────────────────────────────────────────
149
+ # 5) Layout
150
+ # ───────────────────────────────────────────────
151
+ sidebar = pn.Column(
152
+ "## πŸ” Filter Questions",
153
+ w_countries, w_years, w_keyword, w_group,
154
+ pn.Spacer(height=20),
155
+ "## 🧠 Semantic Search",
156
+ w_semquery, w_search_button,
157
+ width=300
158
  )
159
 
160
+ main = pn.Column(
161
+ pn.pane.Markdown("## 🌍 CGD Survey Explorer"),
162
+ pn.Tabs(
163
+ ("Filtered Results", keyword_filter),
164
+ ("Semantic Search Results", result_pane),
165
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  )
167
 
168
+ pn.template.FastListTemplate(
169
+ title="CGD Survey Explorer",
170
+ sidebar=sidebar,
171
  main=main,
172
+ theme_toggle=True,
173
+ ).servable()