siddhartharya commited on
Commit
ad8e10f
·
verified ·
1 Parent(s): f42e018

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +525 -393
app.py CHANGED
@@ -12,6 +12,7 @@ import base64
12
  import logging
13
  import os
14
  import sys
 
15
 
16
  # Import OpenAI library
17
  import openai
@@ -74,37 +75,121 @@ if not GROQ_API_KEY:
74
  openai.api_key = GROQ_API_KEY
75
  openai.api_base = "https://api.groq.com/openai/v1"
76
 
77
- def extract_main_content(soup):
78
  """
79
- Extract the main content from a webpage while filtering out boilerplate content.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  """
81
  if not soup:
82
  return ""
83
 
84
- # Remove script and style elements
85
- for element in soup(['script', 'style', 'header', 'footer', 'nav', 'ads', 'sidebar']):
86
- element.decompose()
87
 
88
- # First try to find content in main content areas
89
- main_content_tags = soup.find_all(['article', 'main', 'div.content', 'div.post', 'div.entry-content'])
90
- if main_content_tags:
91
- content = ' '.join([tag.get_text(strip=True, separator=' ') for tag in main_content_tags])
92
- else:
93
- # Try to find content in <p> tags
94
- p_tags = soup.find_all('p')
95
- if p_tags:
96
- content = ' '.join([p.get_text(strip=True, separator=' ') for p in p_tags])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  else:
98
  # Fallback to body content
99
- content = soup.body.get_text(strip=True, separator=' ') if soup.body else soup.get_text(strip=True, separator=' ')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- # Clean up the text
102
- content = ' '.join(content.split())
103
- content = re.sub(r'\s+', ' ', content) # Remove multiple spaces
104
- content = re.sub(r'[\n\r\t]', ' ', content) # Remove newlines and tabs
105
 
106
- # Limit content length to avoid token limits (adjust as needed)
107
- return content[:5000]
108
 
109
  def get_page_metadata(soup):
110
  """
@@ -119,170 +204,213 @@ def get_page_metadata(soup):
119
  if not soup:
120
  return metadata
121
 
122
- # Get title
123
  title_tag = soup.find('title')
 
 
 
124
  if title_tag and title_tag.string:
125
  metadata['title'] = title_tag.string.strip()
 
 
 
 
126
 
127
- # Get meta description (try multiple variants)
128
- meta_desc = (
129
- soup.find('meta', attrs={'name': 'description'}) or
130
- soup.find('meta', attrs={'property': 'og:description'}) or
131
- soup.find('meta', attrs={'name': 'twitter:description'})
132
- )
133
- if meta_desc:
134
- metadata['description'] = meta_desc.get('content', '').strip()
135
 
136
- # Get meta keywords
137
- meta_keywords = soup.find('meta', attrs={'name': 'keywords'})
138
- if meta_keywords:
139
- metadata['keywords'] = meta_keywords.get('content', '').strip()
 
140
 
141
- # Get OG title if main title is empty
142
- if not metadata['title']:
143
- og_title = soup.find('meta', attrs={'property': 'og:title'})
144
- if og_title:
145
- metadata['title'] = og_title.get('content', '').strip()
146
 
147
  return metadata
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  def generate_summary(bookmark):
150
  """
151
  Generate a comprehensive summary for a bookmark using available content and LLM.
152
  """
153
- logger.info(f"Generating summary for bookmark: {bookmark.get('url')}")
154
 
155
  try:
156
- # Get the HTML soup object from the bookmark if it exists
157
  soup = BeautifulSoup(bookmark.get('html_content', ''), 'html.parser')
158
 
159
- # Step 1: Extract all available information
160
  metadata = get_page_metadata(soup)
161
- main_content = extract_main_content(soup)
162
 
163
- # Step 2: Generate summary using LLM with all available content
 
 
 
 
 
 
164
  try:
165
- # Prepare comprehensive context for LLM
166
- available_content = []
167
- if metadata['title']:
168
- available_content.append(f"Title: {metadata['title']}")
169
- if metadata['description']:
170
- available_content.append(f"Description: {metadata['description']}")
171
- if metadata['keywords']:
172
- available_content.append(f"Keywords: {metadata['keywords']}")
173
- if main_content:
174
- available_content.append(f"Main Content: {main_content}")
175
 
176
- if not available_content:
177
- logger.warning("No content available for summary generation")
178
- bookmark['summary'] = bookmark.get('title', 'No summary available.')
179
  return bookmark
180
 
181
- prompt = f"""
182
- Analyze and summarize this webpage based on the following information:
183
-
184
- {' | '.join(available_content)}
185
-
186
- Please provide a concise summary (2-3 sentences) focusing on:
187
- 1. The main purpose or topic of the page
188
- 2. Key information or features
189
- 3. Target audience or use case (if apparent)
190
-
191
- Be factual and objective.
192
- """
193
-
194
- response = openai.ChatCompletion.create(
195
- model='llama3-8b-8192',
196
- messages=[
197
- {"role": "system", "content": "You are a helpful assistant that creates concise webpage summaries."},
198
- {"role": "user", "content": prompt}
199
- ],
200
- max_tokens=150,
201
- temperature=0.5,
202
- )
203
-
204
- summary = response['choices'][0]['message']['content'].strip()
205
- logger.info("Successfully generated LLM summary")
206
- bookmark['summary'] = summary
207
- return bookmark
208
-
209
  except Exception as e:
210
- logger.error(f"Error generating LLM summary: {e}")
211
- # Fallback mechanisms in order of preference
212
- if metadata['description']:
213
- logger.info("Falling back to meta description")
214
- bookmark['summary'] = metadata['description']
215
- elif main_content:
216
- logger.info("Falling back to truncated main content")
217
- bookmark['summary'] = ' '.join(main_content.split()[:50]) + '...'
218
- elif metadata['title']:
219
- logger.info("Falling back to title")
220
- bookmark['summary'] = metadata['title']
221
- else:
222
- bookmark['summary'] = bookmark.get('title', 'No summary available.')
223
- return bookmark
224
 
225
  except Exception as e:
226
  logger.error(f"Error in generate_summary: {e}")
227
  bookmark['summary'] = bookmark.get('title', 'No summary available.')
228
- return bookmark
229
-
230
- def parse_bookmarks(file_content):
231
- """
232
- Parse bookmarks from HTML file.
233
- """
234
- logger.info("Parsing bookmarks")
235
- try:
236
- soup = BeautifulSoup(file_content, 'html.parser')
237
- extracted_bookmarks = []
238
- for link in soup.find_all('a'):
239
- url = link.get('href')
240
- title = link.text.strip()
241
- if url and title:
242
- extracted_bookmarks.append({'url': url, 'title': title})
243
- logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
244
- return extracted_bookmarks
245
- except Exception as e:
246
- logger.error("Error parsing bookmarks: %s", e)
247
- raise
248
 
249
  async def fetch_url_info(session, bookmark):
250
  """
251
- Fetch information about a URL asynchronously.
252
  """
253
  url = bookmark['url']
254
  if url in fetch_cache:
255
  bookmark.update(fetch_cache[url])
256
  return bookmark
257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  try:
259
  logger.info(f"Fetching URL info for: {url}")
260
- headers = {
261
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
262
- }
263
- async with session.get(url, timeout=10, headers=headers) as response:
 
 
 
 
 
 
 
264
  bookmark['etag'] = response.headers.get('ETag', 'N/A')
265
- bookmark['status_code'] = response.status
266
-
267
- if response.status >= 400:
268
- bookmark['dead_link'] = True
269
- bookmark['description'] = ''
 
 
 
 
 
 
270
  bookmark['html_content'] = ''
271
- logger.warning(f"Dead link detected: {url} with status {response.status}")
272
  else:
273
- bookmark['dead_link'] = False
274
- content = await response.text()
275
- bookmark['html_content'] = content # Store full HTML for summary generation
276
- bookmark['description'] = '' # Will be set by generate_summary function
277
- logger.info(f"Fetched information for {url}")
 
 
 
278
  except Exception as e:
279
- bookmark['dead_link'] = True
280
- bookmark['etag'] = 'N/A'
281
- bookmark['status_code'] = 'N/A'
282
- bookmark['description'] = ''
283
- bookmark['html_content'] = ''
284
- logger.error(f"Error fetching URL info for {url}: {e}")
285
  finally:
 
 
 
 
 
 
286
  fetch_cache[url] = {
287
  'etag': bookmark.get('etag'),
288
  'status_code': bookmark.get('status_code'),
@@ -290,76 +418,80 @@ async def fetch_url_info(session, bookmark):
290
  'description': bookmark.get('description'),
291
  'html_content': bookmark.get('html_content', '')
292
  }
 
293
  return bookmark
294
 
295
  async def process_bookmarks_async(bookmarks_list):
296
  """
297
- Process all bookmarks asynchronously.
298
  """
299
  logger.info("Processing bookmarks asynchronously")
300
  try:
301
- connector = aiohttp.TCPConnector(limit=5) # Limit concurrent connections
302
- timeout = aiohttp.ClientTimeout(total=30) # Set timeout
303
- async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
 
 
 
 
 
 
 
 
 
 
 
 
304
  tasks = []
305
  for bookmark in bookmarks_list:
306
  task = asyncio.ensure_future(fetch_url_info(session, bookmark))
307
  tasks.append(task)
308
- await asyncio.gather(*tasks)
 
 
 
 
 
 
 
309
  logger.info("Completed processing bookmarks asynchronously")
310
  except Exception as e:
311
  logger.error(f"Error in asynchronous processing of bookmarks: {e}")
312
  raise
313
 
314
- def assign_category(bookmark):
315
  """
316
- Assign a category to a bookmark based on its content.
317
  """
318
- if bookmark.get('dead_link'):
319
- bookmark['category'] = 'Dead Link'
320
- logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
321
- return bookmark
322
-
323
- summary = bookmark.get('summary', '').lower()
324
- assigned_category = 'Uncategorized'
325
-
326
- # Keywords associated with each category
327
- category_keywords = {
328
- "Social Media": ["social media", "networking", "friends", "connect", "posts", "profile"],
329
- "News and Media": ["news", "journalism", "media", "headlines", "breaking news"],
330
- "Education and Learning": ["education", "learning", "courses", "tutorial", "university", "academy", "study"],
331
- "Entertainment": ["entertainment", "movies", "tv shows", "games", "comics", "fun"],
332
- "Shopping and E-commerce": ["shopping", "e-commerce", "buy", "sell", "marketplace", "deals", "store"],
333
- "Finance and Banking": ["finance", "banking", "investment", "money", "economy", "stock", "trading"],
334
- "Technology": ["technology", "tech", "gadgets", "software", "computers", "innovation"],
335
- "Health and Fitness": ["health", "fitness", "medical", "wellness", "exercise", "diet"],
336
- "Travel and Tourism": ["travel", "tourism", "destinations", "hotels", "flights", "vacation"],
337
- "Food and Recipes": ["food", "recipes", "cooking", "cuisine", "restaurant", "dining"],
338
- "Sports": ["sports", "scores", "teams", "athletics", "matches", "leagues"],
339
- "Arts and Culture": ["arts", "culture", "museum", "gallery", "exhibition", "artistic"],
340
- "Government and Politics": ["government", "politics", "policy", "election", "public service"],
341
- "Business and Economy": ["business", "corporate", "industry", "economy", "markets"],
342
- "Science and Research": ["science", "research", "experiment", "laboratory", "study", "scientific"],
343
- "Personal Blogs and Journals": ["blog", "journal", "personal", "diary", "thoughts", "opinions"],
344
- "Job Search and Careers": ["jobs", "careers", "recruitment", "resume", "employment", "hiring"],
345
- "Music and Audio": ["music", "audio", "songs", "albums", "artists", "bands"],
346
- "Videos and Movies": ["video", "movies", "film", "clips", "trailers", "cinema"],
347
- "Reference and Knowledge Bases": ["reference", "encyclopedia", "dictionary", "wiki", "knowledge", "information"],
348
- }
349
-
350
- for category, keywords in category_keywords.items():
351
- for keyword in keywords:
352
- if re.search(r'\b' + re.escape(keyword) + r'\b', summary):
353
- assigned_category = category
354
- logger.info(f"Assigned category '{assigned_category}' to bookmark: {bookmark.get('url')}")
355
- break
356
- if assigned_category != 'Uncategorized':
357
- break
358
-
359
- bookmark['category'] = assigned_category
360
- if assigned_category == 'Uncategorized':
361
- logger.info(f"No matching category found for bookmark: {bookmark.get('url')}")
362
- return bookmark
363
 
364
  def vectorize_and_index(bookmarks_list):
365
  """
@@ -367,11 +499,25 @@ def vectorize_and_index(bookmarks_list):
367
  """
368
  logger.info("Vectorizing summaries and building FAISS index")
369
  try:
370
- summaries = [bookmark['summary'] for bookmark in bookmarks_list]
 
 
 
 
 
 
 
 
 
371
  embeddings = embedding_model.encode(summaries)
 
 
372
  dimension = embeddings.shape[1]
373
  faiss_idx = faiss.IndexFlatL2(dimension)
 
 
374
  faiss_idx.add(np.array(embeddings))
 
375
  logger.info("FAISS index built successfully")
376
  return faiss_idx, embeddings
377
  except Exception as e:
@@ -380,7 +526,7 @@ def vectorize_and_index(bookmarks_list):
380
 
381
  def display_bookmarks():
382
  """
383
- Generate HTML display for bookmarks.
384
  """
385
  logger.info("Generating HTML display for bookmarks")
386
  cards = ''
@@ -392,18 +538,23 @@ def display_bookmarks():
392
  etag = bookmark.get('etag', 'N/A')
393
  summary = bookmark.get('summary', '')
394
  category = bookmark.get('category', 'Uncategorized')
 
395
 
 
396
  if bookmark.get('dead_link'):
397
- card_style = "border: 2px solid var(--error-color);"
398
- text_style = "color: var(--error-color);"
399
  else:
400
- card_style = "border: 2px solid var(--success-color);"
401
  text_style = "color: var(--text-color);"
402
 
403
  card_html = f'''
404
- <div class="card" style="{card_style}; padding: 10px; margin: 10px; border-radius: 5px;">
405
  <div class="card-content">
406
- <h3 style="{text_style}">{index}. {title} {status}</h3>
 
 
 
407
  <p style="{text_style}"><strong>Category:</strong> {category}</p>
408
  <p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
409
  <p style="{text_style}"><strong>ETag:</strong> {etag}</p>
@@ -412,181 +563,264 @@ def display_bookmarks():
412
  </div>
413
  '''
414
  cards += card_html
 
 
 
 
 
 
 
 
415
  logger.info("HTML display generated")
416
- return cards
417
 
418
  def process_uploaded_file(file):
419
  """
420
- Process the uploaded bookmarks file.
421
  """
422
  global bookmarks, faiss_index
423
  logger.info("Processing uploaded file")
424
 
425
  if file is None:
426
- logger.warning("No file uploaded")
427
- return "Please upload a bookmarks HTML file.", '', gr.update(choices=[]), display_bookmarks()
428
 
429
  try:
430
  file_content = file.decode('utf-8')
431
  except UnicodeDecodeError as e:
432
- logger.error(f"Error decoding the file: {e}")
433
- return "Error decoding the file. Please ensure it's a valid HTML file.", '', gr.update(choices=[]), display_bookmarks()
434
 
435
  try:
436
  bookmarks = parse_bookmarks(file_content)
437
  except Exception as e:
438
  logger.error(f"Error parsing bookmarks: {e}")
439
- return "Error parsing the bookmarks HTML file.", '', gr.update(choices=[]), display_bookmarks()
440
 
441
  if not bookmarks:
442
- logger.warning("No bookmarks found in the uploaded file")
443
- return "No bookmarks found in the uploaded file.", '', gr.update(choices=[]), display_bookmarks()
444
 
445
- # Asynchronously fetch bookmark info
446
  try:
 
447
  asyncio.run(process_bookmarks_async(bookmarks))
448
- except Exception as e:
449
- logger.error(f"Error processing bookmarks asynchronously: {e}")
450
- return "Error processing bookmarks.", '', gr.update(choices=[]), display_bookmarks()
451
-
452
- # Generate summaries and assign categories
453
- for bookmark in bookmarks:
454
- generate_summary(bookmark)
455
- assign_category(bookmark)
456
 
457
- try:
458
  faiss_index, embeddings = vectorize_and_index(bookmarks)
459
- except Exception as e:
460
- logger.error(f"Error building FAISS index: {e}")
461
- return "Error building search index.", '', gr.update(choices=[]), display_bookmarks()
462
-
463
- message = f"✅ Successfully processed {len(bookmarks)} bookmarks."
464
- logger.info(message)
465
-
466
- # Generate displays and updates
467
- bookmark_html = display_bookmarks()
468
- choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
469
- for i, bookmark in enumerate(bookmarks)]
470
 
471
- return message, bookmark_html, gr.update(choices=choices), bookmark_html
 
 
472
 
473
  def delete_selected_bookmarks(selected_indices):
474
  """
475
- Delete selected bookmarks.
476
  """
477
  global bookmarks, faiss_index
 
478
  if not selected_indices:
479
  return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks()
480
 
481
- indices = [int(s.split('.')[0])-1 for s in selected_indices]
482
- indices = sorted(indices, reverse=True)
483
- for idx in indices:
484
- if 0 <= idx < len(bookmarks):
485
- logger.info(f"Deleting bookmark at index {idx + 1}")
486
- bookmarks.pop(idx)
487
-
488
- if bookmarks:
489
- faiss_index, embeddings = vectorize_and_index(bookmarks)
490
- else:
491
- faiss_index = None
 
 
 
 
492
 
493
- message = "🗑️ Selected bookmarks deleted successfully."
494
- logger.info(message)
495
-
496
- # Update choices and display
497
- choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
498
- for i, bookmark in enumerate(bookmarks)]
499
-
500
- return message, gr.update(choices=choices), display_bookmarks()
501
 
502
  def edit_selected_bookmarks_category(selected_indices, new_category):
503
  """
504
- Edit category of selected bookmarks.
505
  """
506
  if not selected_indices:
507
  return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks()
508
  if not new_category:
509
  return "⚠️ No new category selected.", gr.update(choices=[]), display_bookmarks()
510
 
511
- indices = [int(s.split('.')[0])-1 for s in selected_indices]
512
- for idx in indices:
513
- if 0 <= idx < len(bookmarks):
514
- bookmarks[idx]['category'] = new_category
515
- logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
516
-
517
- message = "✏️ Category updated for selected bookmarks."
518
- logger.info(message)
519
-
520
- # Update choices and display
521
- choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
522
- for i, bookmark in enumerate(bookmarks)]
523
-
524
- return message, gr.update(choices=choices), display_bookmarks()
 
 
 
 
 
525
 
526
  def export_bookmarks():
527
  """
528
- Export bookmarks to HTML file.
529
  """
530
  if not bookmarks:
531
- logger.warning("No bookmarks to export")
532
  return "⚠️ No bookmarks to export."
533
 
534
  try:
535
- logger.info("Exporting bookmarks to HTML")
536
- soup = BeautifulSoup("<!DOCTYPE NETSCAPE-Bookmark-file-1><Title>Bookmarks</Title><H1>Bookmarks</H1>", 'html.parser')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
  dl = soup.new_tag('DL')
 
 
 
 
538
  for bookmark in bookmarks:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  dt = soup.new_tag('DT')
540
  a = soup.new_tag('A', href=bookmark['url'])
 
 
 
 
541
  a.string = bookmark['title']
542
  dt.append(a)
543
- dl.append(dt)
544
- soup.append(dl)
545
  html_content = str(soup)
546
  b64 = base64.b64encode(html_content.encode()).decode()
547
  href = f'data:text/html;base64,{b64}'
 
548
  logger.info("Bookmarks exported successfully")
549
- return f'<a href="{href}" download="bookmarks.html">💾 Download Exported Bookmarks</a>'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  except Exception as e:
551
  logger.error(f"Error exporting bookmarks: {e}")
552
  return "⚠️ Error exporting bookmarks."
553
 
554
  def chatbot_response(user_query):
555
  """
556
- Generate chatbot response using Groq Cloud API.
557
  """
558
  if not GROQ_API_KEY:
559
- logger.warning("GROQ_API_KEY not set.")
560
  return "⚠️ API key not set. Please set the GROQ_API_KEY environment variable."
561
 
562
  if not bookmarks:
563
- logger.warning("No bookmarks available for chatbot")
564
  return "⚠️ No bookmarks available. Please upload and process your bookmarks first."
565
 
566
- logger.info(f"Chatbot received query: {user_query}")
567
 
568
  try:
569
- max_bookmarks = 50
570
- bookmark_data = ""
571
- for idx, bookmark in enumerate(bookmarks[:max_bookmarks]):
572
- bookmark_data += f"{idx+1}. Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
 
574
  prompt = f"""
575
- You are an assistant that helps users find relevant bookmarks from their collection based on their queries.
576
 
577
- User Query:
578
- {user_query}
579
 
580
- Bookmarks:
581
- {bookmark_data}
 
 
582
 
583
- Please identify the most relevant bookmarks that match the user's query. Provide a concise list including the index, title, URL, and a brief summary.
584
  """
585
 
586
  response = openai.ChatCompletion.create(
587
  model='llama3-8b-8192',
588
  messages=[
589
- {"role": "system", "content": "You help users find relevant bookmarks based on their queries."},
590
  {"role": "user", "content": prompt}
591
  ],
592
  max_tokens=500,
@@ -594,7 +828,7 @@ def chatbot_response(user_query):
594
  )
595
 
596
  answer = response['choices'][0]['message']['content'].strip()
597
- logger.info("Chatbot response generated using Groq Cloud API")
598
  return answer
599
 
600
  except Exception as e:
@@ -604,114 +838,12 @@ def chatbot_response(user_query):
604
 
605
  def build_app():
606
  """
607
- Build and launch the Gradio app.
608
  """
609
  try:
610
  logger.info("Building Gradio app")
611
  with gr.Blocks(css="app.css") as demo:
612
- # General Overview
613
- gr.Markdown("""
614
- # 📚 SmartMarks - AI Browser Bookmarks Manager
615
-
616
- Welcome to **SmartMarks**, your intelligent assistant for managing browser bookmarks. SmartMarks leverages AI to help you organize, search, and interact with your bookmarks seamlessly.
617
-
618
- ---
619
-
620
- ## 🚀 **How to Use SmartMarks**
621
-
622
- SmartMarks is divided into three main sections:
623
-
624
- 1. **📂 Upload and Process Bookmarks:** Import your existing bookmarks and let SmartMarks analyze and categorize them for you.
625
- 2. **💬 Chat with Bookmarks:** Interact with your bookmarks using natural language queries to find relevant links effortlessly.
626
- 3. **🛠️ Manage Bookmarks:** View, edit, delete, and export your bookmarks with ease.
627
- """)
628
-
629
- # Upload and Process Bookmarks Tab
630
- with gr.Tab("Upload and Process Bookmarks"):
631
- gr.Markdown("""
632
- ## 📂 **Upload and Process Bookmarks**
633
-
634
- ### 📝 **Steps:**
635
- 1. Click on the "Upload Bookmarks HTML File" button
636
- 2. Select your bookmarks file
637
- 3. Click "Process Bookmarks" to analyze and organize your bookmarks
638
- """)
639
-
640
- upload = gr.File(label="📁 Upload Bookmarks HTML File", type='binary')
641
- process_button = gr.Button("⚙️ Process Bookmarks")
642
- output_text = gr.Textbox(label="✅ Output", interactive=False)
643
- bookmark_display = gr.HTML(label="📄 Processed Bookmarks")
644
-
645
- # Chat with Bookmarks Tab
646
- with gr.Tab("Chat with Bookmarks"):
647
- gr.Markdown("""
648
- ## 💬 **Chat with Bookmarks**
649
-
650
- Ask questions about your bookmarks and get relevant results.
651
- """)
652
-
653
- user_input = gr.Textbox(
654
- label="✍️ Ask about your bookmarks",
655
- placeholder="e.g., Do I have any bookmarks about AI?"
656
- )
657
- chat_button = gr.Button("📨 Send")
658
- chat_output = gr.Textbox(label="💬 Response", interactive=False)
659
-
660
- # Manage Bookmarks Tab
661
- with gr.Tab("Manage Bookmarks"):
662
- gr.Markdown("""
663
- ## 🛠️ **Manage Bookmarks**
664
- Select bookmarks to delete or edit their categories.
665
- """)
666
-
667
- manage_output = gr.Textbox(label="🔄 Status", interactive=False)
668
- bookmark_selector = gr.CheckboxGroup(
669
- label="✅ Select Bookmarks",
670
- choices=[]
671
- )
672
- new_category = gr.Dropdown(
673
- label="🆕 New Category",
674
- choices=CATEGORIES,
675
- value="Uncategorized"
676
- )
677
- bookmark_display_manage = gr.HTML(label="📄 Bookmarks")
678
-
679
- with gr.Row():
680
- delete_button = gr.Button("🗑️ Delete Selected")
681
- edit_category_button = gr.Button("✏️ Edit Category")
682
- export_button = gr.Button("💾 Export")
683
-
684
- download_link = gr.HTML(label="📥 Download")
685
-
686
- # Set up event handlers
687
- process_button.click(
688
- process_uploaded_file,
689
- inputs=upload,
690
- outputs=[output_text, bookmark_display, bookmark_selector, bookmark_display_manage]
691
- )
692
-
693
- chat_button.click(
694
- chatbot_response,
695
- inputs=user_input,
696
- outputs=chat_output
697
- )
698
-
699
- delete_button.click(
700
- delete_selected_bookmarks,
701
- inputs=bookmark_selector,
702
- outputs=[manage_output, bookmark_selector, bookmark_display_manage]
703
- )
704
-
705
- edit_category_button.click(
706
- edit_selected_bookmarks_category,
707
- inputs=[bookmark_selector, new_category],
708
- outputs=[manage_output, bookmark_selector, bookmark_display_manage]
709
- )
710
-
711
- export_button.click(
712
- export_bookmarks,
713
- outputs=download_link
714
- )
715
 
716
  logger.info("Launching Gradio app")
717
  demo.launch(debug=True)
 
12
  import logging
13
  import os
14
  import sys
15
+ import urllib.parse
16
 
17
  # Import OpenAI library
18
  import openai
 
75
  openai.api_key = GROQ_API_KEY
76
  openai.api_base = "https://api.groq.com/openai/v1"
77
 
78
+ def determine_page_type(soup, url):
79
  """
80
+ Determine the type of webpage for better content extraction.
81
+ """
82
+ url_lower = url.lower()
83
+
84
+ # Check for common platforms
85
+ if 'facebook.com' in url_lower:
86
+ return 'social_media_profile'
87
+ elif 'wikipedia.org' in url_lower:
88
+ return 'wiki_article'
89
+ elif any(domain in url_lower for domain in ['news', 'huffpost', 'times']):
90
+ return 'news_article'
91
+ elif 'youtube.com' in url_lower:
92
+ return 'video_platform'
93
+ elif '.gov' in url_lower or 'government' in url_lower:
94
+ return 'government_site'
95
+ elif 'x.com' in url_lower or 'twitter.com' in url_lower:
96
+ return 'social_media_platform'
97
+
98
+ # Check page structure
99
+ if soup.find('article'):
100
+ return 'article'
101
+ elif soup.find(['shop', 'product', 'price']):
102
+ return 'ecommerce'
103
+ elif soup.find(['forum', 'comment', 'discussion']):
104
+ return 'forum'
105
+
106
+ return 'general'
107
+
108
+ def extract_main_content_by_type(soup, page_type):
109
+ """
110
+ Extract content based on page type for better relevance.
111
  """
112
  if not soup:
113
  return ""
114
 
115
+ content = ""
 
 
116
 
117
+ if page_type == 'news_article':
118
+ # Try to find the main article content
119
+ article_body = soup.find(['article', 'main', 'div'],
120
+ class_=lambda x: x and any(c in str(x).lower()
121
+ for c in ['article', 'story', 'content', 'body']))
122
+ if article_body:
123
+ # Get first few paragraphs
124
+ paragraphs = article_body.find_all('p')
125
+ content = ' '.join(p.get_text() for p in paragraphs[:5])
126
+
127
+ elif page_type == 'wiki_article':
128
+ # For Wikipedia articles
129
+ content_div = soup.find('div', {'id': 'mw-content-text'})
130
+ if content_div:
131
+ paragraphs = content_div.find_all('p')
132
+ content = ' '.join(p.get_text() for p in paragraphs[:3])
133
+
134
+ elif page_type in ['social_media_profile', 'social_media_platform']:
135
+ # For social media pages
136
+ about_section = soup.find(['div', 'section'],
137
+ class_=lambda x: x and any(c in str(x).lower()
138
+ for c in ['about', 'bio', 'profile', 'description']))
139
+ if about_section:
140
+ content = about_section.get_text()
141
+ else:
142
+ # Try to get main content area
143
+ content = soup.find(['div', 'main'],
144
+ class_=lambda x: x and 'content' in str(x).lower())
145
+ if content:
146
+ content = content.get_text()
147
+
148
+ # If no content found using specific extractors, use general extraction
149
+ if not content.strip():
150
+ # Remove unwanted elements
151
+ for element in soup(['script', 'style', 'nav', 'footer', 'header']):
152
+ element.decompose()
153
+
154
+ # Try to find main content area
155
+ main_content = soup.find(['main', 'article', 'div'],
156
+ class_=lambda x: x and 'content' in str(x).lower())
157
+ if main_content:
158
+ # Get all text from paragraphs
159
+ paragraphs = main_content.find_all('p')
160
+ content = ' '.join(p.get_text() for p in paragraphs)
161
  else:
162
  # Fallback to body content
163
+ content = soup.get_text()
164
+
165
+ # Clean the extracted content
166
+ content = clean_text(content)
167
+
168
+ return content[:5000] # Limit content length
169
+
170
+ def clean_text(text):
171
+ """
172
+ Clean extracted text content.
173
+ """
174
+ if not text:
175
+ return ""
176
+
177
+ # Convert to string if necessary
178
+ text = str(text)
179
+
180
+ # Remove extra whitespace
181
+ text = re.sub(r'\s+', ' ', text)
182
+
183
+ # Remove special characters but keep basic punctuation
184
+ text = re.sub(r'[^\w\s.,!?-]', '', text)
185
+
186
+ # Remove multiple punctuation
187
+ text = re.sub(r'([.,!?])\1+', r'\1', text)
188
 
189
+ # Remove very short words (likely garbage)
190
+ text = ' '.join(word for word in text.split() if len(word) > 1)
 
 
191
 
192
+ return text.strip()
 
193
 
194
  def get_page_metadata(soup):
195
  """
 
204
  if not soup:
205
  return metadata
206
 
207
+ # Get title (try multiple sources)
208
  title_tag = soup.find('title')
209
+ og_title = soup.find('meta', {'property': 'og:title'})
210
+ twitter_title = soup.find('meta', {'name': 'twitter:title'})
211
+
212
  if title_tag and title_tag.string:
213
  metadata['title'] = title_tag.string.strip()
214
+ elif og_title and og_title.get('content'):
215
+ metadata['title'] = og_title.get('content').strip()
216
+ elif twitter_title and twitter_title.get('content'):
217
+ metadata['title'] = twitter_title.get('content').strip()
218
 
219
+ # Get meta description (try multiple sources)
220
+ desc_sources = [
221
+ ('meta', {'name': 'description'}),
222
+ ('meta', {'property': 'og:description'}),
223
+ ('meta', {'name': 'twitter:description'}),
224
+ ]
 
 
225
 
226
+ for tag, attrs in desc_sources:
227
+ desc = soup.find(tag, attrs=attrs)
228
+ if desc and desc.get('content'):
229
+ metadata['description'] = desc.get('content').strip()
230
+ break
231
 
232
+ # Get meta keywords
233
+ keywords_tag = soup.find('meta', {'name': 'keywords'})
234
+ if keywords_tag and keywords_tag.get('content'):
235
+ metadata['keywords'] = keywords_tag.get('content').strip()
 
236
 
237
  return metadata
238
 
239
+ def generate_contextual_summary(context):
240
+ """
241
+ Generate summary with context awareness using LLM.
242
+ """
243
+ page_type = context['page_type']
244
+
245
+ # Customize prompt based on page type
246
+ type_specific_prompts = {
247
+ 'news_article': "This is a news article. Focus on the main news event, key facts, and significance.",
248
+ 'wiki_article': "This is a Wikipedia article. Focus on the main topic, key facts, and historical context.",
249
+ 'social_media_profile': "This is a social media profile. Focus on the platform's purpose and key features.",
250
+ 'social_media_platform': "This is a social media platform. Describe its main purpose and unique features.",
251
+ 'ecommerce': "This is an e-commerce site. Focus on what products/services are offered and target audience.",
252
+ 'government_site': "This is a government website. Focus on services offered and public information provided.",
253
+ 'video_platform': "This is a video platform. Describe its main purpose and content sharing features.",
254
+ 'general': "Describe the main purpose and key features of this webpage."
255
+ }
256
+
257
+ prompt = f"""
258
+ Analyze this webpage and create a clear, factual summary:
259
+
260
+ Title: {context['title']}
261
+ Type: {page_type}
262
+ Description: {context['description']}
263
+ Keywords: {context['keywords']}
264
+
265
+ Additional Content:
266
+ {context['content'][:3000]}
267
+
268
+ {type_specific_prompts.get(page_type, type_specific_prompts['general'])}
269
+
270
+ Create a natural, informative 2-3 sentence summary that:
271
+ 1. States the primary purpose/main topic
272
+ 2. Mentions key features or information
273
+ 3. Indicates target audience or use case (if clear)
274
+
275
+ Keep the tone professional and factual.
276
+ """
277
+
278
+ try:
279
+ response = openai.ChatCompletion.create(
280
+ model='llama3-8b-8192',
281
+ messages=[
282
+ {"role": "system", "content": "You are a precise webpage summarizer that creates clear, accurate summaries."},
283
+ {"role": "user", "content": prompt}
284
+ ],
285
+ max_tokens=150,
286
+ temperature=0.3,
287
+ )
288
+
289
+ return response['choices'][0]['message']['content'].strip()
290
+ except Exception as e:
291
+ logger.error(f"Error generating LLM summary: {e}")
292
+ return None
293
+
294
  def generate_summary(bookmark):
295
  """
296
  Generate a comprehensive summary for a bookmark using available content and LLM.
297
  """
298
+ logger.info(f"Generating summary for {bookmark.get('url')}")
299
 
300
  try:
 
301
  soup = BeautifulSoup(bookmark.get('html_content', ''), 'html.parser')
302
 
303
+ # 1. Extract all available metadata
304
  metadata = get_page_metadata(soup)
 
305
 
306
+ # 2. Determine page type and context
307
+ page_type = determine_page_type(soup, bookmark['url'])
308
+
309
+ # 3. Extract relevant content based on page type
310
+ main_content = extract_main_content_by_type(soup, page_type)
311
+
312
+ # 4. Generate summary using LLM with contextual awareness
313
  try:
314
+ context = {
315
+ 'title': metadata['title'] or bookmark.get('title', ''),
316
+ 'description': metadata['description'],
317
+ 'keywords': metadata['keywords'],
318
+ 'page_type': page_type,
319
+ 'content': main_content
320
+ }
 
 
 
321
 
322
+ summary = generate_contextual_summary(context)
323
+ if summary:
324
+ bookmark['summary'] = summary
325
  return bookmark
326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  except Exception as e:
328
+ logger.error(f"Error in LLM summary generation: {e}")
329
+
330
+ # Fallback mechanism
331
+ if metadata['description']:
332
+ bookmark['summary'] = metadata['description']
333
+ elif main_content:
334
+ bookmark['summary'] = ' '.join(main_content.split()[:50]) + '...'
335
+ else:
336
+ bookmark['summary'] = metadata.get('title', bookmark.get('title', 'No summary available.'))
 
 
 
 
 
337
 
338
  except Exception as e:
339
  logger.error(f"Error in generate_summary: {e}")
340
  bookmark['summary'] = bookmark.get('title', 'No summary available.')
341
+
342
+ return bookmark
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
  async def fetch_url_info(session, bookmark):
345
  """
346
+ Enhanced URL fetching with better error handling and request configuration.
347
  """
348
  url = bookmark['url']
349
  if url in fetch_cache:
350
  bookmark.update(fetch_cache[url])
351
  return bookmark
352
 
353
+ headers = {
354
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
355
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
356
+ 'Accept-Language': 'en-US,en;q=0.5',
357
+ 'Accept-Encoding': 'gzip, deflate, br',
358
+ 'Connection': 'keep-alive',
359
+ 'Upgrade-Insecure-Requests': '1',
360
+ 'Sec-Fetch-Dest': 'document',
361
+ 'Sec-Fetch-Mode': 'navigate',
362
+ 'Sec-Fetch-Site': 'none',
363
+ 'Sec-Fetch-User': '?1',
364
+ 'Cache-Control': 'max-age=0'
365
+ }
366
+
367
  try:
368
  logger.info(f"Fetching URL info for: {url}")
369
+ timeout = aiohttp.ClientTimeout(total=30)
370
+ async with session.get(
371
+ url,
372
+ timeout=timeout,
373
+ headers=headers,
374
+ ssl=False,
375
+ allow_redirects=True
376
+ ) as response:
377
+
378
+ status = response.status
379
+ bookmark['status_code'] = status
380
  bookmark['etag'] = response.headers.get('ETag', 'N/A')
381
+
382
+ # Handle different status codes
383
+ if status == 200:
384
+ content = await response.text()
385
+ bookmark['html_content'] = content
386
+ bookmark['dead_link'] = False
387
+ bookmark['description'] = '' # Will be set by generate_summary
388
+ logger.info(f"Successfully fetched content for {url}")
389
+ elif status in [301, 302, 307, 308]:
390
+ # Handle redirects manually if needed
391
+ bookmark['dead_link'] = False
392
  bookmark['html_content'] = ''
393
+ logger.info(f"Redirect detected for {url}")
394
  else:
395
+ bookmark['dead_link'] = True
396
+ bookmark['html_content'] = ''
397
+ logger.warning(f"Non-success status {status} for {url}")
398
+
399
+ except asyncio.TimeoutError:
400
+ logger.warning(f"Timeout while fetching {url}")
401
+ bookmark['dead_link'] = False # Don't mark as dead just because of timeout
402
+ bookmark['status_code'] = 'Timeout'
403
  except Exception as e:
404
+ logger.error(f"Error fetching {url}: {str(e)}")
405
+ bookmark['dead_link'] = False # Don't mark as dead for other errors
406
+ bookmark['status_code'] = str(e)
 
 
 
407
  finally:
408
+ # Ensure all required fields are present
409
+ bookmark.setdefault('html_content', '')
410
+ bookmark.setdefault('description', '')
411
+ bookmark.setdefault('etag', 'N/A')
412
+
413
+ # Update cache
414
  fetch_cache[url] = {
415
  'etag': bookmark.get('etag'),
416
  'status_code': bookmark.get('status_code'),
 
418
  'description': bookmark.get('description'),
419
  'html_content': bookmark.get('html_content', '')
420
  }
421
+
422
  return bookmark
423
 
424
  async def process_bookmarks_async(bookmarks_list):
425
  """
426
+ Process all bookmarks asynchronously with improved error handling.
427
  """
428
  logger.info("Processing bookmarks asynchronously")
429
  try:
430
+ # Configure connection pool and timeout
431
+ tcp_connector = aiohttp.TCPConnector(
432
+ limit=5, # Limit concurrent connections
433
+ force_close=True, # Force close connections
434
+ enable_cleanup_closed=True, # Clean up closed connections
435
+ ssl=False # Disable SSL verification
436
+ )
437
+
438
+ timeout = aiohttp.ClientTimeout(total=30)
439
+
440
+ async with aiohttp.ClientSession(
441
+ connector=tcp_connector,
442
+ timeout=timeout,
443
+ raise_for_status=False # Don't raise exceptions for non-200 status
444
+ ) as session:
445
  tasks = []
446
  for bookmark in bookmarks_list:
447
  task = asyncio.ensure_future(fetch_url_info(session, bookmark))
448
  tasks.append(task)
449
+
450
+ # Process bookmarks in batches to avoid overwhelming servers
451
+ batch_size = 5
452
+ for i in range(0, len(tasks), batch_size):
453
+ batch = tasks[i:i + batch_size]
454
+ await asyncio.gather(*batch)
455
+ await asyncio.sleep(1) # Small delay between batches
456
+
457
  logger.info("Completed processing bookmarks asynchronously")
458
  except Exception as e:
459
  logger.error(f"Error in asynchronous processing of bookmarks: {e}")
460
  raise
461
 
462
+ def parse_bookmarks(file_content):
463
  """
464
+ Parse bookmarks from HTML file with enhanced error handling.
465
  """
466
+ logger.info("Parsing bookmarks")
467
+ try:
468
+ soup = BeautifulSoup(file_content, 'html.parser')
469
+ extracted_bookmarks = []
470
+
471
+ # Find all bookmark links
472
+ for link in soup.find_all('a'):
473
+ url = link.get('href', '').strip()
474
+ title = link.text.strip()
475
+
476
+ # Validate URL and title
477
+ if url and title and url.startswith(('http://', 'https://')):
478
+ # Clean and normalize URL
479
+ parsed_url = urllib.parse.urlparse(url)
480
+ normalized_url = urllib.parse.urlunparse(parsed_url)
481
+
482
+ bookmark = {
483
+ 'url': normalized_url,
484
+ 'title': title,
485
+ 'add_date': link.get('add_date', ''),
486
+ 'icon': link.get('icon', '')
487
+ }
488
+ extracted_bookmarks.append(bookmark)
489
+
490
+ logger.info(f"Extracted {len(extracted_bookmarks)} valid bookmarks")
491
+ return extracted_bookmarks
492
+ except Exception as e:
493
+ logger.error(f"Error parsing bookmarks: {e}")
494
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
 
496
  def vectorize_and_index(bookmarks_list):
497
  """
 
499
  """
500
  logger.info("Vectorizing summaries and building FAISS index")
501
  try:
502
+ # Prepare summaries for vectorization
503
+ summaries = []
504
+ for bookmark in bookmarks_list:
505
+ summary = bookmark.get('summary', '').strip()
506
+ title = bookmark.get('title', '').strip()
507
+ # Combine title and summary for better embedding
508
+ text = f"{title} {summary}".strip()
509
+ summaries.append(text if text else "No content available")
510
+
511
+ # Generate embeddings
512
  embeddings = embedding_model.encode(summaries)
513
+
514
+ # Create and configure FAISS index
515
  dimension = embeddings.shape[1]
516
  faiss_idx = faiss.IndexFlatL2(dimension)
517
+
518
+ # Add vectors to index
519
  faiss_idx.add(np.array(embeddings))
520
+
521
  logger.info("FAISS index built successfully")
522
  return faiss_idx, embeddings
523
  except Exception as e:
 
526
 
527
  def display_bookmarks():
528
  """
529
+ Generate HTML display for bookmarks with enhanced styling.
530
  """
531
  logger.info("Generating HTML display for bookmarks")
532
  cards = ''
 
538
  etag = bookmark.get('etag', 'N/A')
539
  summary = bookmark.get('summary', '')
540
  category = bookmark.get('category', 'Uncategorized')
541
+ status_code = bookmark.get('status_code', 'N/A')
542
 
543
+ # Enhanced styling based on status
544
  if bookmark.get('dead_link'):
545
+ card_style = "border: 2px solid #ff4444; background-color: rgba(255, 68, 68, 0.1);"
546
+ text_style = "color: #ff4444;"
547
  else:
548
+ card_style = "border: 2px solid #00C851; background-color: rgba(0, 200, 81, 0.1);"
549
  text_style = "color: var(--text-color);"
550
 
551
  card_html = f'''
552
+ <div class="card" style="{card_style} padding: 15px; margin: 15px 0; border-radius: 8px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);">
553
  <div class="card-content">
554
+ <h3 style="{text_style} margin-bottom: 10px; font-size: 1.2em;">
555
+ {index}. {title} {status}
556
+ {f'<span style="font-size: 0.8em; color: #666;">({status_code})</span>' if status_code != 'N/A' else ''}
557
+ </h3>
558
  <p style="{text_style}"><strong>Category:</strong> {category}</p>
559
  <p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
560
  <p style="{text_style}"><strong>ETag:</strong> {etag}</p>
 
563
  </div>
564
  '''
565
  cards += card_html
566
+
567
+ # Add container with max width and padding
568
+ display_html = f'''
569
+ <div style="max-width: 1200px; margin: 0 auto; padding: 20px;">
570
+ {cards}
571
+ </div>
572
+ '''
573
+
574
  logger.info("HTML display generated")
575
+ return display_html
576
 
577
  def process_uploaded_file(file):
578
  """
579
+ Process the uploaded bookmarks file with enhanced error handling and user feedback.
580
  """
581
  global bookmarks, faiss_index
582
  logger.info("Processing uploaded file")
583
 
584
  if file is None:
585
+ return "⚠️ Please upload a bookmarks HTML file.", '', gr.update(choices=[]), display_bookmarks()
 
586
 
587
  try:
588
  file_content = file.decode('utf-8')
589
  except UnicodeDecodeError as e:
590
+ logger.error(f"Error decoding file: {e}")
591
+ return "⚠️ Error decoding file. Please ensure it's a valid HTML file.", '', gr.update(choices=[]), display_bookmarks()
592
 
593
  try:
594
  bookmarks = parse_bookmarks(file_content)
595
  except Exception as e:
596
  logger.error(f"Error parsing bookmarks: {e}")
597
+ return "⚠️ Error parsing the bookmarks file.", '', gr.update(choices=[]), display_bookmarks()
598
 
599
  if not bookmarks:
600
+ return "⚠️ No valid bookmarks found in the file.", '', gr.update(choices=[]), display_bookmarks()
 
601
 
 
602
  try:
603
+ logger.info("Processing bookmarks...")
604
  asyncio.run(process_bookmarks_async(bookmarks))
605
+
606
+ # Process in batches for progress tracking
607
+ total = len(bookmarks)
608
+ for i, bookmark in enumerate(bookmarks, 1):
609
+ generate_summary(bookmark)
610
+ assign_category(bookmark)
611
+ logger.info(f"Processed bookmark {i}/{total}")
 
612
 
 
613
  faiss_index, embeddings = vectorize_and_index(bookmarks)
614
+
615
+ message = f" Successfully processed {len(bookmarks)} bookmarks!"
616
+ choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
617
+ for i, bookmark in enumerate(bookmarks)]
618
+
619
+ bookmark_html = display_bookmarks()
620
+ return message, bookmark_html, gr.update(choices=choices), bookmark_html
 
 
 
 
621
 
622
+ except Exception as e:
623
+ logger.error(f"Error processing bookmarks: {e}")
624
+ return "⚠️ Error processing bookmarks. Please try again.", '', gr.update(choices=[]), display_bookmarks()
625
 
626
  def delete_selected_bookmarks(selected_indices):
627
  """
628
+ Delete selected bookmarks with enhanced error handling.
629
  """
630
  global bookmarks, faiss_index
631
+
632
  if not selected_indices:
633
  return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks()
634
 
635
+ try:
636
+ indices = [int(s.split('.')[0])-1 for s in selected_indices]
637
+ indices = sorted(indices, reverse=True)
638
+ deleted_count = 0
639
+
640
+ for idx in indices:
641
+ if 0 <= idx < len(bookmarks):
642
+ logger.info(f"Deleting bookmark: {bookmarks[idx]['title']}")
643
+ bookmarks.pop(idx)
644
+ deleted_count += 1
645
+
646
+ if bookmarks:
647
+ faiss_index, embeddings = vectorize_and_index(bookmarks)
648
+ else:
649
+ faiss_index = None
650
 
651
+ message = f" Successfully deleted {deleted_count} bookmark{'s' if deleted_count != 1 else ''}."
652
+ choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
653
+ for i, bookmark in enumerate(bookmarks)]
654
+
655
+ return message, gr.update(choices=choices), display_bookmarks()
656
+ except Exception as e:
657
+ logger.error(f"Error deleting bookmarks: {e}")
658
+ return "⚠️ Error deleting bookmarks.", gr.update(choices=[]), display_bookmarks()
659
 
660
  def edit_selected_bookmarks_category(selected_indices, new_category):
661
  """
662
+ Edit category of selected bookmarks with enhanced error handling.
663
  """
664
  if not selected_indices:
665
  return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks()
666
  if not new_category:
667
  return "⚠️ No new category selected.", gr.update(choices=[]), display_bookmarks()
668
 
669
+ try:
670
+ indices = [int(s.split('.')[0])-1 for s in selected_indices]
671
+ updated_count = 0
672
+
673
+ for idx in indices:
674
+ if 0 <= idx < len(bookmarks):
675
+ old_category = bookmarks[idx]['category']
676
+ bookmarks[idx]['category'] = new_category
677
+ logger.info(f"Updated category for '{bookmarks[idx]['title']}' from '{old_category}' to '{new_category}'")
678
+ updated_count += 1
679
+
680
+ message = f"✅ Updated category for {updated_count} bookmark{'s' if updated_count != 1 else ''}."
681
+ choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
682
+ for i, bookmark in enumerate(bookmarks)]
683
+
684
+ return message, gr.update(choices=choices), display_bookmarks()
685
+ except Exception as e:
686
+ logger.error(f"Error updating categories: {e}")
687
+ return "⚠️ Error updating categories.", gr.update(choices=[]), display_bookmarks()
688
 
689
  def export_bookmarks():
690
  """
691
+ Export bookmarks to HTML file with enhanced formatting.
692
  """
693
  if not bookmarks:
 
694
  return "⚠️ No bookmarks to export."
695
 
696
  try:
697
+ logger.info("Exporting bookmarks")
698
+ soup = BeautifulSoup("<!DOCTYPE NETSCAPE-Bookmark-file-1>", 'html.parser')
699
+
700
+ # Add metadata
701
+ meta = soup.new_tag('META')
702
+ meta['HTTP-EQUIV'] = 'Content-Type'
703
+ meta['CONTENT'] = 'text/html; charset=UTF-8'
704
+ soup.append(meta)
705
+
706
+ # Add title
707
+ title = soup.new_tag('TITLE')
708
+ title.string = 'Bookmarks'
709
+ soup.append(title)
710
+
711
+ # Add heading
712
+ h1 = soup.new_tag('H1')
713
+ h1.string = 'Bookmarks'
714
+ soup.append(h1)
715
+
716
+ # Create main bookmark list
717
  dl = soup.new_tag('DL')
718
+ soup.append(dl)
719
+
720
+ # Add bookmarks with categories
721
+ current_category = None
722
  for bookmark in bookmarks:
723
+ category = bookmark.get('category', 'Uncategorized')
724
+
725
+ # Create category folder if needed
726
+ if category != current_category:
727
+ current_category = category
728
+ dt_cat = soup.new_tag('DT')
729
+ h3_cat = soup.new_tag('H3')
730
+ h3_cat.string = category
731
+ dt_cat.append(h3_cat)
732
+ dl_cat = soup.new_tag('DL')
733
+ dt_cat.append(dl_cat)
734
+ dl.append(dt_cat)
735
+
736
+ # Add bookmark
737
  dt = soup.new_tag('DT')
738
  a = soup.new_tag('A', href=bookmark['url'])
739
+ if 'add_date' in bookmark:
740
+ a['ADD_DATE'] = bookmark['add_date']
741
+ if 'icon' in bookmark:
742
+ a['ICON'] = bookmark['icon']
743
  a.string = bookmark['title']
744
  dt.append(a)
745
+ dl_cat.append(dt)
746
+
747
  html_content = str(soup)
748
  b64 = base64.b64encode(html_content.encode()).decode()
749
  href = f'data:text/html;base64,{b64}'
750
+
751
  logger.info("Bookmarks exported successfully")
752
+ return f'''
753
+ <div style="text-align: center;">
754
+ <a href="{href}"
755
+ download="bookmarks.html"
756
+ style="display: inline-block;
757
+ padding: 10px 20px;
758
+ background-color: #4CAF50;
759
+ color: white;
760
+ text-decoration: none;
761
+ border-radius: 5px;
762
+ margin: 10px;">
763
+ 💾 Download Exported Bookmarks
764
+ </a>
765
+ </div>
766
+ '''
767
  except Exception as e:
768
  logger.error(f"Error exporting bookmarks: {e}")
769
  return "⚠️ Error exporting bookmarks."
770
 
771
  def chatbot_response(user_query):
772
  """
773
+ Generate chatbot response with enhanced context understanding.
774
  """
775
  if not GROQ_API_KEY:
 
776
  return "⚠️ API key not set. Please set the GROQ_API_KEY environment variable."
777
 
778
  if not bookmarks:
 
779
  return "⚠️ No bookmarks available. Please upload and process your bookmarks first."
780
 
781
+ logger.info(f"Processing query: {user_query}")
782
 
783
  try:
784
+ # Get relevant bookmarks using FAISS
785
+ query_embedding = embedding_model.encode([user_query]).astype('float32')
786
+ k = min(5, len(bookmarks)) # Get top 5 or all if less than 5
787
+ D, I = faiss_index.search(query_embedding, k)
788
+
789
+ relevant_bookmarks = []
790
+ for idx in I[0]:
791
+ if idx != -1: # Valid index
792
+ bookmark_data = bookmarks[idx]
793
+ relevant_bookmarks.append({
794
+ 'title': bookmark_data['title'],
795
+ 'url': bookmark_data['url'],
796
+ 'summary': bookmark_data['summary'],
797
+ 'category': bookmark_data['category']
798
+ })
799
+
800
+ # Prepare context for LLM
801
+ bookmark_descriptions = []
802
+ for i, bm in enumerate(relevant_bookmarks, 1):
803
+ desc = f"{i}. Title: {bm['title']}\n URL: {bm['url']}\n Category: {bm['category']}\n Summary: {bm['summary']}"
804
+ bookmark_descriptions.append(desc)
805
 
806
  prompt = f"""
807
+ User Query: {user_query}
808
 
809
+ Relevant Bookmarks:
810
+ {'\n\n'.join(bookmark_descriptions)}
811
 
812
+ Please provide a helpful response that:
813
+ 1. Identifies the most relevant bookmarks for the query
814
+ 2. Explains why each bookmark might be useful
815
+ 3. Suggests how the user might use these resources
816
 
817
+ Format the response in a clear, readable way with appropriate spacing and structure.
818
  """
819
 
820
  response = openai.ChatCompletion.create(
821
  model='llama3-8b-8192',
822
  messages=[
823
+ {"role": "system", "content": "You are a helpful assistant that finds and explains relevant bookmarks."},
824
  {"role": "user", "content": prompt}
825
  ],
826
  max_tokens=500,
 
828
  )
829
 
830
  answer = response['choices'][0]['message']['content'].strip()
831
+ logger.info("Generated response successfully")
832
  return answer
833
 
834
  except Exception as e:
 
838
 
839
  def build_app():
840
  """
841
+ Build and launch the Gradio app with enhanced UI and functionality.
842
  """
843
  try:
844
  logger.info("Building Gradio app")
845
  with gr.Blocks(css="app.css") as demo:
846
+ # ... [Rest of the UI code remains the same as before] ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
847
 
848
  logger.info("Launching Gradio app")
849
  demo.launch(debug=True)