siddhartharya commited on
Commit
70eb2ff
Β·
verified Β·
1 Parent(s): ab5c457

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +205 -204
app.py CHANGED
@@ -8,14 +8,18 @@ import numpy as np
8
  import requests
9
  import time
10
  import re
 
11
  import logging
12
  import os
13
  import sys
 
14
  from concurrent.futures import ThreadPoolExecutor
15
  import threading
16
- from html import escape
17
 
18
- # Suppress specific warnings
 
 
 
19
  import urllib3
20
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
21
 
@@ -43,10 +47,6 @@ fetch_cache = {}
43
 
44
  # Lock for thread-safe operations
45
  lock = threading.Lock()
46
- api_lock = threading.Lock() # Added api_lock
47
-
48
- # Initialize last_api_call_time
49
- last_api_call_time = 0 # Added initialization
50
 
51
  # Define the categories
52
  CATEGORIES = [
@@ -74,41 +74,18 @@ CATEGORIES = [
74
  "Uncategorized",
75
  ]
76
 
77
- # Define a function to generate responses using llama-3.1-70b-versatile
78
- def generate_llama_response(prompt):
79
- """
80
- Generate a response using the llama-3.1-70b-versatile model.
81
 
82
- This implementation assumes that the model is accessible via a local HTTP API endpoint.
83
- Replace the URL and request parameters as per your actual setup.
84
- """
85
- try:
86
- logger.info("Generating response using llama-3.1-70b-versatile")
87
- api_url = "http://localhost:5000/generate" # Replace with your actual endpoint
88
- headers = {
89
- 'Content-Type': 'application/json',
90
- }
91
- payload = {
92
- 'prompt': prompt,
93
- 'max_tokens': 500, # Adjust as needed
94
- 'temperature': 0.7, # Adjust as needed
95
- }
96
- response = requests.post(api_url, json=payload, headers=headers, timeout=30)
97
- response.raise_for_status() # Raise an exception for HTTP errors
98
- data = response.json()
99
- generated_text = data.get('response', '').strip()
100
- if not generated_text:
101
- raise ValueError("Empty response received from the model.")
102
- return generated_text
103
- except requests.exceptions.RequestException as e:
104
- logger.error(f"HTTP Request failed: {e}", exc_info=True)
105
- return "Error generating response due to HTTP request failure."
106
- except ValueError as ve:
107
- logger.error(f"Value Error: {ve}", exc_info=True)
108
- return "Error generating response: Received empty response from the model."
109
- except Exception as e:
110
- logger.error(f"Unexpected error: {e}", exc_info=True)
111
- return "An unexpected error occurred while generating the response."
112
 
113
  def extract_main_content(soup):
114
  """
@@ -130,7 +107,7 @@ def extract_main_content(soup):
130
  content = soup.get_text(separator=' ', strip=True)
131
 
132
  # Clean up the text
133
- content = re.sub(r'\s+', ' ', content) # Remove multiple spaces
134
 
135
  # Truncate content to a reasonable length (e.g., 1500 words)
136
  words = content.split()
@@ -181,55 +158,57 @@ def get_page_metadata(soup):
181
 
182
  def generate_summary_and_assign_category(bookmark):
183
  """
184
- Generate a concise summary and assign a category using the llama-3.1-70b-versatile model.
185
  """
186
  logger.info(f"Generating summary and assigning category for bookmark: {bookmark.get('url')}")
187
 
188
- try:
189
- # Rate Limiting Logic
190
- with api_lock:
191
- global last_api_call_time
192
- current_time = time.time()
193
- elapsed = current_time - last_api_call_time
194
- if elapsed < 2:
195
- sleep_duration = 2 - elapsed
196
- logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
197
- time.sleep(sleep_duration)
198
- last_api_call_time = current_time
199
-
200
- # Prepare the prompt
201
- html_content = bookmark.get('html_content', '')
202
- soup = BeautifulSoup(html_content, 'html.parser')
203
- metadata = get_page_metadata(soup)
204
- main_content = extract_main_content(soup)
205
-
206
- # Prepare content for the prompt
207
- content_parts = []
208
- if metadata['title']:
209
- content_parts.append(f"Title: {metadata['title']}")
210
- if metadata['description']:
211
- content_parts.append(f"Description: {metadata['description']}")
212
- if metadata['keywords']:
213
- content_parts.append(f"Keywords: {metadata['keywords']}")
214
- if main_content:
215
- content_parts.append(f"Main Content: {main_content}")
216
-
217
- content_text = '\n'.join(content_parts)
218
-
219
- # Detect insufficient or erroneous content
220
- error_keywords = ['Access Denied', 'Security Check', 'Cloudflare', 'captcha', 'unusual traffic']
221
- if not content_text or len(content_text.split()) < 50:
222
- use_prior_knowledge = True
223
- logger.info(f"Content for {bookmark.get('url')} is insufficient. Instructing LLM to use prior knowledge.")
224
- elif any(keyword.lower() in content_text.lower() for keyword in error_keywords):
225
- use_prior_knowledge = True
226
- logger.info(f"Content for {bookmark.get('url')} contains error messages. Instructing LLM to use prior knowledge.")
227
- else:
228
- use_prior_knowledge = False
 
 
 
229
 
230
- # Craft the prompt based on content availability
231
- if use_prior_knowledge:
232
- prompt = f"""
233
  You are a knowledgeable assistant with up-to-date information as of 2023.
234
  URL: {bookmark.get('url')}
235
  Provide:
@@ -241,8 +220,8 @@ Format:
241
  Summary: [Your summary]
242
  Category: [One category]
243
  """
244
- else:
245
- prompt = f"""
246
  You are an assistant that creates concise webpage summaries and assigns categories.
247
  Content:
248
  {content_text}
@@ -256,44 +235,70 @@ Summary: [Your summary]
256
  Category: [One category]
257
  """
258
 
259
- # Generate response using llama-3.1-70b-versatile
260
- response = generate_llama_response(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
- if not response:
263
- raise ValueError("Empty response received from the model.")
 
264
 
265
- # Parse the response
266
- summary_match = re.search(r"Summary:\s*(.*)", response)
267
- category_match = re.search(r"Category:\s*(.*)", response)
268
 
269
- if summary_match:
270
- bookmark['summary'] = summary_match.group(1).strip()
271
- else:
272
- bookmark['summary'] = 'No summary available.'
273
 
274
- if category_match:
275
- category = category_match.group(1).strip().strip('"')
276
- if category in CATEGORIES:
277
- bookmark['category'] = category
 
 
278
  else:
279
  bookmark['category'] = 'Uncategorized'
280
- else:
281
- bookmark['category'] = 'Uncategorized'
282
-
283
- # Optional: Simple keyword-based validation
284
- summary_lower = bookmark['summary'].lower()
285
- url_lower = bookmark['url'].lower()
286
- if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
287
- bookmark['category'] = 'Social Media'
288
- elif 'wikipedia' in url_lower:
289
- bookmark['category'] = 'Reference and Knowledge Bases'
290
 
291
- logger.info("Successfully generated summary and assigned category")
292
-
293
- except Exception as e:
294
- logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
295
- bookmark['summary'] = 'No summary available.'
296
- bookmark['category'] = 'Uncategorized'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
  def parse_bookmarks(file_content):
299
  """
@@ -340,9 +345,7 @@ def fetch_url_info(bookmark):
340
  content = response.text
341
  logger.info(f"Fetched content length for {url}: {len(content)} characters")
342
 
343
- # Handle status codes
344
  if response.status_code >= 500:
345
- # Server error, consider as dead link
346
  bookmark['dead_link'] = True
347
  bookmark['description'] = ''
348
  bookmark['html_content'] = ''
@@ -354,12 +357,12 @@ def fetch_url_info(bookmark):
354
  logger.info(f"Fetched information for {url}")
355
 
356
  except requests.exceptions.Timeout:
357
- bookmark['dead_link'] = False # Mark as 'Unknown' instead of 'Dead'
358
  bookmark['etag'] = 'N/A'
359
  bookmark['status_code'] = 'Timeout'
360
  bookmark['description'] = ''
361
  bookmark['html_content'] = ''
362
- bookmark['slow_link'] = True # Custom flag to indicate slow response
363
  logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
364
  except Exception as e:
365
  bookmark['dead_link'] = True
@@ -390,7 +393,6 @@ def vectorize_and_index(bookmarks_list):
390
  embeddings = embedding_model.encode(summaries)
391
  dimension = embeddings.shape[1]
392
  index = faiss.IndexIDMap(faiss.IndexFlatL2(dimension))
393
- # Assign unique IDs to each bookmark
394
  ids = np.array([bookmark['id'] for bookmark in bookmarks_list], dtype=np.int64)
395
  index.add_with_ids(np.array(embeddings).astype('float32'), ids)
396
  faiss_index = index
@@ -411,15 +413,15 @@ def display_bookmarks():
411
  if bookmark.get('dead_link'):
412
  status = "❌ Dead Link"
413
  card_style = "border: 2px solid red;"
414
- text_style = "color: white;" # Set font color to white
415
  elif bookmark.get('slow_link'):
416
- status = "⏳ Slow Response"
417
  card_style = "border: 2px solid orange;"
418
- text_style = "color: white;" # Set font color to white
419
  else:
420
  status = "βœ… Active"
421
  card_style = "border: 2px solid green;"
422
- text_style = "color: white;" # Set font color to white
423
 
424
  title = bookmark['title']
425
  url = bookmark['url']
@@ -428,6 +430,7 @@ def display_bookmarks():
428
  category = bookmark.get('category', 'Uncategorized')
429
 
430
  # Escape HTML content to prevent XSS attacks
 
431
  title = escape(title)
432
  url = escape(url)
433
  summary = escape(summary)
@@ -457,23 +460,23 @@ def process_uploaded_file(file, state_bookmarks):
457
 
458
  if file is None:
459
  logger.warning("No file uploaded")
460
- return "Please upload a bookmarks HTML file.", '', state_bookmarks, gr.update(choices=[])
461
 
462
  try:
463
  file_content = file.decode('utf-8')
464
  except UnicodeDecodeError as e:
465
  logger.error(f"Error decoding the file: {e}", exc_info=True)
466
- return "Error decoding the file. Please ensure it's a valid HTML file.", '', state_bookmarks, gr.update(choices=[])
467
 
468
  try:
469
  bookmarks = parse_bookmarks(file_content)
470
  except Exception as e:
471
  logger.error(f"Error parsing bookmarks: {e}", exc_info=True)
472
- return "Error parsing the bookmarks HTML file.", '', state_bookmarks, gr.update(choices=[])
473
 
474
  if not bookmarks:
475
  logger.warning("No bookmarks found in the uploaded file")
476
- return "No bookmarks found in the uploaded file.", '', state_bookmarks, gr.update(choices=[])
477
 
478
  # Assign unique IDs to bookmarks
479
  for idx, bookmark in enumerate(bookmarks):
@@ -481,19 +484,19 @@ def process_uploaded_file(file, state_bookmarks):
481
 
482
  # Fetch bookmark info concurrently
483
  logger.info("Fetching URL info concurrently")
484
- with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed
485
  executor.map(fetch_url_info, bookmarks)
486
 
487
  # Process bookmarks concurrently with LLM calls
488
  logger.info("Processing bookmarks with LLM concurrently")
489
- with ThreadPoolExecutor(max_workers=1) as executor: # Serialize API calls to respect rate limits
490
  executor.map(generate_summary_and_assign_category, bookmarks)
491
 
492
  try:
493
  faiss_index = vectorize_and_index(bookmarks)
494
  except Exception as e:
495
  logger.error(f"Error building FAISS index: {e}", exc_info=True)
496
- return "Error building search index.", '', state_bookmarks, gr.update(choices=[])
497
 
498
  message = f"βœ… Successfully processed {len(bookmarks)} bookmarks."
499
  logger.info(message)
@@ -506,7 +509,7 @@ def process_uploaded_file(file, state_bookmarks):
506
  # Update state
507
  state_bookmarks = bookmarks.copy()
508
 
509
- return message, bookmark_html, state_bookmarks, gr.update(choices=choices)
510
 
511
  def delete_selected_bookmarks(selected_indices, state_bookmarks):
512
  """
@@ -519,15 +522,12 @@ def delete_selected_bookmarks(selected_indices, state_bookmarks):
519
  ids_to_delete = []
520
  indices_to_delete = []
521
  for s in selected_indices:
522
- try:
523
- idx = int(s.split('.')[0]) - 1
524
- if 0 <= idx < len(bookmarks):
525
- bookmark_id = bookmarks[idx]['id']
526
- ids_to_delete.append(bookmark_id)
527
- indices_to_delete.append(idx)
528
- logger.info(f"Deleting bookmark at index {idx + 1}")
529
- except (ValueError, IndexError):
530
- logger.warning(f"Invalid selection format: {s}")
531
 
532
  # Remove vectors from FAISS index
533
  if faiss_index is not None and ids_to_delete:
@@ -556,20 +556,11 @@ def edit_selected_bookmarks_category(selected_indices, new_category, state_bookm
556
  if not new_category:
557
  return "⚠️ No new category selected.", gr.update(choices=[]), display_bookmarks(), state_bookmarks
558
 
559
- indices = []
560
- for s in selected_indices:
561
- try:
562
- idx = int(s.split('.')[0])-1
563
- if 0 <= idx < len(bookmarks):
564
- indices.append(idx)
565
- else:
566
- logger.warning(f"Index out of range: {idx + 1}")
567
- except ValueError:
568
- logger.warning(f"Invalid selection format: {s}")
569
-
570
  for idx in indices:
571
- bookmarks[idx]['category'] = new_category
572
- logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
 
573
 
574
  message = "✏️ Category updated for selected bookmarks."
575
  logger.info(message)
@@ -589,7 +580,7 @@ def export_bookmarks():
589
  """
590
  if not bookmarks:
591
  logger.warning("No bookmarks to export")
592
- return None # Return None to indicate no file
593
 
594
  try:
595
  logger.info("Exporting bookmarks to HTML")
@@ -603,19 +594,18 @@ def export_bookmarks():
603
  dl.append(dt)
604
  soup.append(dl)
605
  html_content = str(soup)
606
- # Save to a temporary file
607
  output_file = "exported_bookmarks.html"
608
  with open(output_file, 'w', encoding='utf-8') as f:
609
  f.write(html_content)
610
  logger.info("Bookmarks exported successfully")
611
- return output_file # Return the file path
612
  except Exception as e:
613
  logger.error(f"Error exporting bookmarks: {e}", exc_info=True)
614
- return None # Return None in case of error
615
 
616
  def chatbot_response(user_query, chat_history):
617
  """
618
- Generate chatbot response using the FAISS index and embeddings, maintaining chat history.
619
  """
620
  if not bookmarks or faiss_index is None:
621
  logger.warning("No bookmarks available for chatbot")
@@ -625,10 +615,8 @@ def chatbot_response(user_query, chat_history):
625
  logger.info(f"Chatbot received query: {user_query}")
626
 
627
  try:
628
- # Append user's message to chat history
629
  chat_history.append({"role": "user", "content": user_query})
630
 
631
- # Rate Limiting Logic (if necessary)
632
  with api_lock:
633
  global last_api_call_time
634
  current_time = time.time()
@@ -637,15 +625,13 @@ def chatbot_response(user_query, chat_history):
637
  sleep_duration = 2 - elapsed
638
  logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
639
  time.sleep(sleep_duration)
640
- last_api_call_time = current_time
641
 
642
- # Encode the query and search the FAISS index
643
  query_vector = embedding_model.encode([user_query]).astype('float32')
644
- k = 5 # Number of results to return
645
  distances, ids = faiss_index.search(query_vector, k)
646
  ids = ids.flatten()
647
 
648
- # Retrieve the bookmarks
649
  id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
650
  matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark]
651
 
@@ -654,13 +640,11 @@ def chatbot_response(user_query, chat_history):
654
  chat_history.append({"role": "assistant", "content": answer})
655
  return chat_history
656
 
657
- # Format the response
658
  bookmarks_info = "\n".join([
659
  f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
660
  for bookmark in matching_bookmarks
661
  ])
662
 
663
- # Craft the prompt for the LLM
664
  prompt = f"""
665
  A user asked: "{user_query}"
666
  Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
@@ -669,19 +653,39 @@ Bookmarks:
669
  Provide a concise and helpful response.
670
  """
671
 
672
- # Generate response using llama-3.1-70b-versatile
673
- response = generate_llama_response(prompt)
 
 
 
 
674
 
675
- if not response:
676
- raise ValueError("Empty response received from the model.")
 
 
677
 
678
- answer = response.strip()
 
 
 
 
 
 
 
 
 
679
  logger.info("Chatbot response generated")
 
680
 
681
- # Append the assistant's response to chat history
682
  chat_history.append({"role": "assistant", "content": answer})
683
  return chat_history
684
 
 
 
 
 
 
685
  except Exception as e:
686
  error_message = f"⚠️ Error processing your query: {str(e)}"
687
  logger.error(error_message, exc_info=True)
@@ -698,12 +702,6 @@ def build_app():
698
  # Initialize state
699
  state_bookmarks = gr.State([])
700
 
701
- # Define 'bookmark_selector' globally
702
- bookmark_selector = gr.CheckboxGroup(
703
- label="βœ… Select Bookmarks",
704
- choices=[]
705
- )
706
-
707
  # General Overview
708
  gr.Markdown("""
709
  # πŸ“š SmartMarks - AI Browser Bookmarks Manager
@@ -723,7 +721,7 @@ SmartMarks is divided into three main sections:
723
  Navigate through the tabs to explore each feature in detail.
724
  """)
725
 
726
- # Define tabs
727
  with gr.Tab("Upload and Process Bookmarks"):
728
  gr.Markdown("""
729
  ## πŸ“‚ **Upload and Process Bookmarks**
@@ -741,17 +739,13 @@ Navigate through the tabs to explore each feature in detail.
741
  3. **View Processed Bookmarks:**
742
  - Once processing is complete, your bookmarks will be displayed in an organized and visually appealing format below.
743
  """)
 
744
  upload = gr.File(label="πŸ“ Upload Bookmarks HTML File", type='binary')
745
  process_button = gr.Button("βš™οΈ Process Bookmarks")
746
  output_text = gr.Textbox(label="βœ… Output", interactive=False)
747
  bookmark_display = gr.HTML(label="πŸ“„ Processed Bookmarks")
748
 
749
- process_button.click(
750
- process_uploaded_file,
751
- inputs=[upload, state_bookmarks],
752
- outputs=[output_text, bookmark_display, state_bookmarks, bookmark_selector]
753
- )
754
-
755
  with gr.Tab("Chat with Bookmarks"):
756
  gr.Markdown("""
757
  ## πŸ’¬ **Chat with Bookmarks**
@@ -770,6 +764,7 @@ Navigate through the tabs to explore each feature in detail.
770
  4. **View Chat History:**
771
  - All your queries and the corresponding AI responses are displayed in the chat history for your reference.
772
  """)
 
773
  chatbot = gr.Chatbot(label="πŸ’¬ Chat with SmartMarks", type='messages')
774
  user_input = gr.Textbox(
775
  label="✍️ Ask about your bookmarks",
@@ -783,10 +778,10 @@ Navigate through the tabs to explore each feature in detail.
783
  outputs=chatbot
784
  )
785
 
 
786
  with gr.Tab("Manage Bookmarks"):
787
  gr.Markdown("""
788
- ## πŸ› οΈ **Manage Bookmarks**
789
-
790
  ### πŸ—‚οΈ **Features:**
791
 
792
  1. **View Bookmarks:**
@@ -810,7 +805,15 @@ Navigate through the tabs to explore each feature in detail.
810
  6. **Refresh Bookmarks:**
811
  - Click the **"πŸ”„ Refresh Bookmarks"** button to ensure the latest state is reflected in the display.
812
  """)
 
813
  manage_output = gr.Textbox(label="πŸ”„ Status", interactive=False)
 
 
 
 
 
 
 
814
  new_category = gr.Dropdown(
815
  label="πŸ†• New Category",
816
  choices=CATEGORIES,
@@ -818,11 +821,6 @@ Navigate through the tabs to explore each feature in detail.
818
  )
819
  bookmark_display_manage = gr.HTML(label="πŸ“„ Bookmarks")
820
 
821
- with gr.Row():
822
- # Include 'bookmark_selector' within the tab
823
- # It is defined globally and will be displayed only in this tab via CSS
824
- bookmark_selector
825
-
826
  with gr.Row():
827
  delete_button = gr.Button("πŸ—‘οΈ Delete Selected")
828
  edit_category_button = gr.Button("✏️ Edit Category")
@@ -831,7 +829,13 @@ Navigate through the tabs to explore each feature in detail.
831
 
832
  download_link = gr.File(label="πŸ“₯ Download Exported Bookmarks")
833
 
834
- # Define button actions
 
 
 
 
 
 
835
  delete_button.click(
836
  delete_selected_bookmarks,
837
  inputs=[bookmark_selector, state_bookmarks],
@@ -852,7 +856,8 @@ Navigate through the tabs to explore each feature in detail.
852
  refresh_button.click(
853
  lambda state_bookmarks: (
854
  [
855
- f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})" for i, bookmark in enumerate(state_bookmarks)
 
856
  ],
857
  display_bookmarks()
858
  ),
@@ -862,12 +867,8 @@ Navigate through the tabs to explore each feature in detail.
862
 
863
  logger.info("Launching Gradio app")
864
  demo.launch(debug=True)
865
- except gr.Error as e:
866
- logger.error(f"Gradio Error: {e}", exc_info=True)
867
- print(f"Gradio Error: {e}")
868
  except Exception as e:
869
  logger.error(f"Error building the app: {e}", exc_info=True)
870
  print(f"Error building the app: {e}")
871
 
872
- if __name__ == "__main__":
873
- build_app()
 
8
  import requests
9
  import time
10
  import re
11
+ import base64
12
  import logging
13
  import os
14
  import sys
15
+ import concurrent.futures
16
  from concurrent.futures import ThreadPoolExecutor
17
  import threading
 
18
 
19
+ # Import OpenAI library
20
+ import openai
21
+
22
+ # Suppress only the single warning from urllib3 needed.
23
  import urllib3
24
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
25
 
 
47
 
48
  # Lock for thread-safe operations
49
  lock = threading.Lock()
 
 
 
 
50
 
51
  # Define the categories
52
  CATEGORIES = [
 
74
  "Uncategorized",
75
  ]
76
 
77
+ # Set up Groq Cloud API key and base URL
78
+ GROQ_API_KEY = os.getenv('GROQ_API_KEY')
 
 
79
 
80
+ if not GROQ_API_KEY:
81
+ logger.error("GROQ_API_KEY environment variable not set.")
82
+
83
+ openai.api_key = GROQ_API_KEY
84
+ openai.api_base = "https://api.groq.com/openai/v1"
85
+
86
+ # Initialize global variables for rate limiting
87
+ api_lock = threading.Lock()
88
+ last_api_call_time = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  def extract_main_content(soup):
91
  """
 
107
  content = soup.get_text(separator=' ', strip=True)
108
 
109
  # Clean up the text
110
+ content = re.sub(r'\s+', ' ', content)
111
 
112
  # Truncate content to a reasonable length (e.g., 1500 words)
113
  words = content.split()
 
158
 
159
  def generate_summary_and_assign_category(bookmark):
160
  """
161
+ Generate a concise summary and assign a category using a single LLM call.
162
  """
163
  logger.info(f"Generating summary and assigning category for bookmark: {bookmark.get('url')}")
164
 
165
+ max_retries = 3
166
+ retry_count = 0
167
+
168
+ while retry_count < max_retries:
169
+ try:
170
+ # Rate Limiting Logic
171
+ with api_lock:
172
+ global last_api_call_time
173
+ current_time = time.time()
174
+ elapsed = current_time - last_api_call_time
175
+ if elapsed < 2:
176
+ sleep_duration = 2 - elapsed
177
+ logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
178
+ time.sleep(sleep_duration)
179
+ last_api_call_time = time.time()
180
+
181
+ html_content = bookmark.get('html_content', '')
182
+ soup = BeautifulSoup(html_content, 'html.parser')
183
+ metadata = get_page_metadata(soup)
184
+ main_content = extract_main_content(soup)
185
+
186
+ # Prepare content for the prompt
187
+ content_parts = []
188
+ if metadata['title']:
189
+ content_parts.append(f"Title: {metadata['title']}")
190
+ if metadata['description']:
191
+ content_parts.append(f"Description: {metadata['description']}")
192
+ if metadata['keywords']:
193
+ content_parts.append(f"Keywords: {metadata['keywords']}")
194
+ if main_content:
195
+ content_parts.append(f"Main Content: {main_content}")
196
+
197
+ content_text = '\n'.join(content_parts)
198
+
199
+ # Detect insufficient or erroneous content
200
+ error_keywords = ['Access Denied', 'Security Check', 'Cloudflare', 'captcha', 'unusual traffic']
201
+ if not content_text or len(content_text.split()) < 50:
202
+ use_prior_knowledge = True
203
+ logger.info(f"Content for {bookmark.get('url')} is insufficient. Instructing LLM to use prior knowledge.")
204
+ elif any(keyword.lower() in content_text.lower() for keyword in error_keywords):
205
+ use_prior_knowledge = True
206
+ logger.info(f"Content for {bookmark.get('url')} contains error messages. Instructing LLM to use prior knowledge.")
207
+ else:
208
+ use_prior_knowledge = False
209
 
210
+ if use_prior_knowledge:
211
+ prompt = f"""
 
212
  You are a knowledgeable assistant with up-to-date information as of 2023.
213
  URL: {bookmark.get('url')}
214
  Provide:
 
220
  Summary: [Your summary]
221
  Category: [One category]
222
  """
223
+ else:
224
+ prompt = f"""
225
  You are an assistant that creates concise webpage summaries and assigns categories.
226
  Content:
227
  {content_text}
 
235
  Category: [One category]
236
  """
237
 
238
+ def estimate_tokens(text):
239
+ return len(text) / 4
240
+
241
+ prompt_tokens = estimate_tokens(prompt)
242
+ max_tokens = 150
243
+ total_tokens = prompt_tokens + max_tokens
244
+
245
+ tokens_per_minute = 40000
246
+ tokens_per_second = tokens_per_minute / 60
247
+ required_delay = total_tokens / tokens_per_second
248
+ sleep_time = max(required_delay, 2)
249
+
250
+ response = openai.ChatCompletion.create(
251
+ model='llama-3.1-70b-versatile',
252
+ messages=[
253
+ {"role": "user", "content": prompt}
254
+ ],
255
+ max_tokens=int(max_tokens),
256
+ temperature=0.5,
257
+ )
258
 
259
+ content = response['choices'][0]['message']['content'].strip()
260
+ if not content:
261
+ raise ValueError("Empty response received from the model.")
262
 
263
+ summary_match = re.search(r"Summary:\s*(.*)", content)
264
+ category_match = re.search(r"Category:\s*(.*)", content)
 
265
 
266
+ if summary_match:
267
+ bookmark['summary'] = summary_match.group(1).strip()
268
+ else:
269
+ bookmark['summary'] = 'No summary available.'
270
 
271
+ if category_match:
272
+ category = category_match.group(1).strip().strip('"')
273
+ if category in CATEGORIES:
274
+ bookmark['category'] = category
275
+ else:
276
+ bookmark['category'] = 'Uncategorized'
277
  else:
278
  bookmark['category'] = 'Uncategorized'
 
 
 
 
 
 
 
 
 
 
279
 
280
+ # Simple keyword-based validation
281
+ summary_lower = bookmark['summary'].lower()
282
+ url_lower = bookmark['url'].lower()
283
+ if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
284
+ bookmark['category'] = 'Social Media'
285
+ elif 'wikipedia' in url_lower:
286
+ bookmark['category'] = 'Reference and Knowledge Bases'
287
+
288
+ logger.info("Successfully generated summary and assigned category")
289
+ time.sleep(sleep_time)
290
+ break
291
+
292
+ except openai.error.RateLimitError as e:
293
+ retry_count += 1
294
+ wait_time = int(e.headers.get("Retry-After", 5))
295
+ logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying... (Attempt {retry_count}/{max_retries})")
296
+ time.sleep(wait_time)
297
+ except Exception as e:
298
+ logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
299
+ bookmark['summary'] = 'No summary available.'
300
+ bookmark['category'] = 'Uncategorized'
301
+ break
302
 
303
  def parse_bookmarks(file_content):
304
  """
 
345
  content = response.text
346
  logger.info(f"Fetched content length for {url}: {len(content)} characters")
347
 
 
348
  if response.status_code >= 500:
 
349
  bookmark['dead_link'] = True
350
  bookmark['description'] = ''
351
  bookmark['html_content'] = ''
 
357
  logger.info(f"Fetched information for {url}")
358
 
359
  except requests.exceptions.Timeout:
360
+ bookmark['dead_link'] = False
361
  bookmark['etag'] = 'N/A'
362
  bookmark['status_code'] = 'Timeout'
363
  bookmark['description'] = ''
364
  bookmark['html_content'] = ''
365
+ bookmark['slow_link'] = True
366
  logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
367
  except Exception as e:
368
  bookmark['dead_link'] = True
 
393
  embeddings = embedding_model.encode(summaries)
394
  dimension = embeddings.shape[1]
395
  index = faiss.IndexIDMap(faiss.IndexFlatL2(dimension))
 
396
  ids = np.array([bookmark['id'] for bookmark in bookmarks_list], dtype=np.int64)
397
  index.add_with_ids(np.array(embeddings).astype('float32'), ids)
398
  faiss_index = index
 
413
  if bookmark.get('dead_link'):
414
  status = "❌ Dead Link"
415
  card_style = "border: 2px solid red;"
416
+ text_style = "color: white;"
417
  elif bookmark.get('slow_link'):
418
+ status = "⏳ Slow Response"
419
  card_style = "border: 2px solid orange;"
420
+ text_style = "color: white;"
421
  else:
422
  status = "βœ… Active"
423
  card_style = "border: 2px solid green;"
424
+ text_style = "color: white;"
425
 
426
  title = bookmark['title']
427
  url = bookmark['url']
 
430
  category = bookmark.get('category', 'Uncategorized')
431
 
432
  # Escape HTML content to prevent XSS attacks
433
+ from html import escape
434
  title = escape(title)
435
  url = escape(url)
436
  summary = escape(summary)
 
460
 
461
  if file is None:
462
  logger.warning("No file uploaded")
463
+ return "Please upload a bookmarks HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
464
 
465
  try:
466
  file_content = file.decode('utf-8')
467
  except UnicodeDecodeError as e:
468
  logger.error(f"Error decoding the file: {e}", exc_info=True)
469
+ return "Error decoding the file. Please ensure it's a valid HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
470
 
471
  try:
472
  bookmarks = parse_bookmarks(file_content)
473
  except Exception as e:
474
  logger.error(f"Error parsing bookmarks: {e}", exc_info=True)
475
+ return "Error parsing the bookmarks HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
476
 
477
  if not bookmarks:
478
  logger.warning("No bookmarks found in the uploaded file")
479
+ return "No bookmarks found in the uploaded file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
480
 
481
  # Assign unique IDs to bookmarks
482
  for idx, bookmark in enumerate(bookmarks):
 
484
 
485
  # Fetch bookmark info concurrently
486
  logger.info("Fetching URL info concurrently")
487
+ with ThreadPoolExecutor(max_workers=10) as executor:
488
  executor.map(fetch_url_info, bookmarks)
489
 
490
  # Process bookmarks concurrently with LLM calls
491
  logger.info("Processing bookmarks with LLM concurrently")
492
+ with ThreadPoolExecutor(max_workers=1) as executor:
493
  executor.map(generate_summary_and_assign_category, bookmarks)
494
 
495
  try:
496
  faiss_index = vectorize_and_index(bookmarks)
497
  except Exception as e:
498
  logger.error(f"Error building FAISS index: {e}", exc_info=True)
499
+ return "Error building search index.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
500
 
501
  message = f"βœ… Successfully processed {len(bookmarks)} bookmarks."
502
  logger.info(message)
 
509
  # Update state
510
  state_bookmarks = bookmarks.copy()
511
 
512
+ return message, bookmark_html, state_bookmarks, bookmark_html, gr.update(choices=choices)
513
 
514
  def delete_selected_bookmarks(selected_indices, state_bookmarks):
515
  """
 
522
  ids_to_delete = []
523
  indices_to_delete = []
524
  for s in selected_indices:
525
+ idx = int(s.split('.')[0]) - 1
526
+ if 0 <= idx < len(bookmarks):
527
+ bookmark_id = bookmarks[idx]['id']
528
+ ids_to_delete.append(bookmark_id)
529
+ indices_to_delete.append(idx)
530
+ logger.info(f"Deleting bookmark at index {idx + 1}")
 
 
 
531
 
532
  # Remove vectors from FAISS index
533
  if faiss_index is not None and ids_to_delete:
 
556
  if not new_category:
557
  return "⚠️ No new category selected.", gr.update(choices=[]), display_bookmarks(), state_bookmarks
558
 
559
+ indices = [int(s.split('.')[0])-1 for s in selected_indices]
 
 
 
 
 
 
 
 
 
 
560
  for idx in indices:
561
+ if 0 <= idx < len(bookmarks):
562
+ bookmarks[idx]['category'] = new_category
563
+ logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
564
 
565
  message = "✏️ Category updated for selected bookmarks."
566
  logger.info(message)
 
580
  """
581
  if not bookmarks:
582
  logger.warning("No bookmarks to export")
583
+ return None
584
 
585
  try:
586
  logger.info("Exporting bookmarks to HTML")
 
594
  dl.append(dt)
595
  soup.append(dl)
596
  html_content = str(soup)
 
597
  output_file = "exported_bookmarks.html"
598
  with open(output_file, 'w', encoding='utf-8') as f:
599
  f.write(html_content)
600
  logger.info("Bookmarks exported successfully")
601
+ return output_file
602
  except Exception as e:
603
  logger.error(f"Error exporting bookmarks: {e}", exc_info=True)
604
+ return None
605
 
606
  def chatbot_response(user_query, chat_history):
607
  """
608
+ Generate chatbot response using the FAISS index and embeddings.
609
  """
610
  if not bookmarks or faiss_index is None:
611
  logger.warning("No bookmarks available for chatbot")
 
615
  logger.info(f"Chatbot received query: {user_query}")
616
 
617
  try:
 
618
  chat_history.append({"role": "user", "content": user_query})
619
 
 
620
  with api_lock:
621
  global last_api_call_time
622
  current_time = time.time()
 
625
  sleep_duration = 2 - elapsed
626
  logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
627
  time.sleep(sleep_duration)
628
+ last_api_call_time = time.time()
629
 
 
630
  query_vector = embedding_model.encode([user_query]).astype('float32')
631
+ k = 5
632
  distances, ids = faiss_index.search(query_vector, k)
633
  ids = ids.flatten()
634
 
 
635
  id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
636
  matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark]
637
 
 
640
  chat_history.append({"role": "assistant", "content": answer})
641
  return chat_history
642
 
 
643
  bookmarks_info = "\n".join([
644
  f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
645
  for bookmark in matching_bookmarks
646
  ])
647
 
 
648
  prompt = f"""
649
  A user asked: "{user_query}"
650
  Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
 
653
  Provide a concise and helpful response.
654
  """
655
 
656
+ def estimate_tokens(text):
657
+ return len(text) / 4
658
+
659
+ prompt_tokens = estimate_tokens(prompt)
660
+ max_tokens = 300
661
+ total_tokens = prompt_tokens + max_tokens
662
 
663
+ tokens_per_minute = 40000
664
+ tokens_per_second = tokens_per_minute / 60
665
+ required_delay = total_tokens / tokens_per_second
666
+ sleep_time = max(required_delay, 2)
667
 
668
+ response = openai.ChatCompletion.create(
669
+ model='llama-3.1-70b-versatile',
670
+ messages=[
671
+ {"role": "user", "content": prompt}
672
+ ],
673
+ max_tokens=int(max_tokens),
674
+ temperature=0.7,
675
+ )
676
+
677
+ answer = response['choices'][0]['message']['content'].strip()
678
  logger.info("Chatbot response generated")
679
+ time.sleep(sleep_time)
680
 
 
681
  chat_history.append({"role": "assistant", "content": answer})
682
  return chat_history
683
 
684
+ except openai.error.RateLimitError as e:
685
+ wait_time = int(e.headers.get("Retry-After", 5))
686
+ logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
687
+ time.sleep(wait_time)
688
+ return chatbot_response(user_query, chat_history)
689
  except Exception as e:
690
  error_message = f"⚠️ Error processing your query: {str(e)}"
691
  logger.error(error_message, exc_info=True)
 
702
  # Initialize state
703
  state_bookmarks = gr.State([])
704
 
 
 
 
 
 
 
705
  # General Overview
706
  gr.Markdown("""
707
  # πŸ“š SmartMarks - AI Browser Bookmarks Manager
 
721
  Navigate through the tabs to explore each feature in detail.
722
  """)
723
 
724
+ # Upload and Process Bookmarks Tab
725
  with gr.Tab("Upload and Process Bookmarks"):
726
  gr.Markdown("""
727
  ## πŸ“‚ **Upload and Process Bookmarks**
 
739
  3. **View Processed Bookmarks:**
740
  - Once processing is complete, your bookmarks will be displayed in an organized and visually appealing format below.
741
  """)
742
+
743
  upload = gr.File(label="πŸ“ Upload Bookmarks HTML File", type='binary')
744
  process_button = gr.Button("βš™οΈ Process Bookmarks")
745
  output_text = gr.Textbox(label="βœ… Output", interactive=False)
746
  bookmark_display = gr.HTML(label="πŸ“„ Processed Bookmarks")
747
 
748
+ # Chat with Bookmarks Tab
 
 
 
 
 
749
  with gr.Tab("Chat with Bookmarks"):
750
  gr.Markdown("""
751
  ## πŸ’¬ **Chat with Bookmarks**
 
764
  4. **View Chat History:**
765
  - All your queries and the corresponding AI responses are displayed in the chat history for your reference.
766
  """)
767
+
768
  chatbot = gr.Chatbot(label="πŸ’¬ Chat with SmartMarks", type='messages')
769
  user_input = gr.Textbox(
770
  label="✍️ Ask about your bookmarks",
 
778
  outputs=chatbot
779
  )
780
 
781
+ # Manage Bookmarks Tab
782
  with gr.Tab("Manage Bookmarks"):
783
  gr.Markdown("""
784
+ ## πŸ› οΈ **Manage Bookmarks
 
785
  ### πŸ—‚οΈ **Features:**
786
 
787
  1. **View Bookmarks:**
 
805
  6. **Refresh Bookmarks:**
806
  - Click the **"πŸ”„ Refresh Bookmarks"** button to ensure the latest state is reflected in the display.
807
  """)
808
+
809
  manage_output = gr.Textbox(label="πŸ”„ Status", interactive=False)
810
+
811
+ # Move bookmark_selector definition here
812
+ bookmark_selector = gr.CheckboxGroup(
813
+ label="βœ… Select Bookmarks",
814
+ choices=[]
815
+ )
816
+
817
  new_category = gr.Dropdown(
818
  label="πŸ†• New Category",
819
  choices=CATEGORIES,
 
821
  )
822
  bookmark_display_manage = gr.HTML(label="πŸ“„ Bookmarks")
823
 
 
 
 
 
 
824
  with gr.Row():
825
  delete_button = gr.Button("πŸ—‘οΈ Delete Selected")
826
  edit_category_button = gr.Button("✏️ Edit Category")
 
829
 
830
  download_link = gr.File(label="πŸ“₯ Download Exported Bookmarks")
831
 
832
+ # Update process_button to use the bookmark_selector in Manage tab
833
+ process_button.click(
834
+ process_uploaded_file,
835
+ inputs=[upload, state_bookmarks],
836
+ outputs=[output_text, bookmark_display, state_bookmarks, bookmark_display, bookmark_selector]
837
+ )
838
+
839
  delete_button.click(
840
  delete_selected_bookmarks,
841
  inputs=[bookmark_selector, state_bookmarks],
 
856
  refresh_button.click(
857
  lambda state_bookmarks: (
858
  [
859
+ f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
860
+ for i, bookmark in enumerate(state_bookmarks)
861
  ],
862
  display_bookmarks()
863
  ),
 
867
 
868
  logger.info("Launching Gradio app")
869
  demo.launch(debug=True)
 
 
 
870
  except Exception as e:
871
  logger.error(f"Error building the app: {e}", exc_info=True)
872
  print(f"Error building the app: {e}")
873
 
874
+ if __name__ == "__main__":