siddhartharya commited on
Commit
eb485e7
·
verified ·
1 Parent(s): 31c955d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -75
app.py CHANGED
@@ -265,21 +265,23 @@ def llm_worker():
265
  tpm_bucket.wait_for_token(tokens=total_tokens)
266
 
267
  # Prepare prompt
268
- prompt = "You are an assistant that creates concise webpage summaries and assigns categories.\n\n"
269
- prompt += "Provide summaries and categories for the following bookmarks:\n\n"
 
270
 
 
271
  for idx, bookmark in enumerate(batch, 1):
272
- prompt += f"Bookmark {idx}:\nURL: {bookmark['url']}\nTitle: {bookmark['title']}\n\n"
273
 
274
  # Corrected f-string without backslashes
275
- prompt += f"Categories:\n{', '.join([f'\"{cat}\"' for cat in CATEGORIES])}\n\n"
276
 
277
  prompt += "Format your response as a JSON object where each key is the bookmark URL and the value is another JSON object containing 'summary' and 'category'.\n\n"
278
  prompt += "Example:\n"
279
  prompt += "{\n"
280
- prompt += " \"https://example.com\": {\n"
281
- prompt += " \"summary\": \"This is an example summary.\",\n"
282
- prompt += " \"category\": \"Technology\"\n"
283
  prompt += " }\n"
284
  prompt += "}\n\n"
285
  prompt += "Now, provide the summaries and categories for the bookmarks listed above."
@@ -338,7 +340,7 @@ def llm_worker():
338
  bookmark['summary'] = 'No summary available.'
339
  bookmark['category'] = 'Uncategorized'
340
 
341
- except openai.error.RateLimitError as e:
342
  logger.warning(f"LLM Rate limit reached. Retrying after 60 seconds.")
343
  # Re-enqueue the entire batch for retry
344
  for bookmark in batch:
@@ -357,41 +359,27 @@ def llm_worker():
357
  for _ in batch:
358
  llm_queue.task_done()
359
 
360
- def categorize_based_on_summary(summary, url):
361
- """
362
- Assign category based on keywords in the summary or URL.
363
- """
364
- summary_lower = summary.lower()
365
- url_lower = url.lower()
366
- if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
367
- return 'Social Media'
368
- elif 'wikipedia' in url_lower:
369
- return 'Reference and Knowledge Bases'
370
- elif 'cloud computing' in summary_lower or 'aws' in summary_lower:
371
- return 'Technology'
372
- elif 'news' in summary_lower or 'media' in summary_lower:
373
- return 'News and Media'
374
- elif 'education' in summary_lower or 'learning' in summary_lower:
375
- return 'Education and Learning'
376
- # Add more conditions as needed
377
- else:
378
- return 'Uncategorized'
379
-
380
- def validate_category(bookmark):
381
  """
382
- Further validate and adjust the category if needed.
383
  """
384
- # Example: Specific cases based on URL
385
- url_lower = bookmark['url'].lower()
386
- if 'facebook' in url_lower or 'x.com' in url_lower:
387
- return 'Social Media'
388
- elif 'wikipedia' in url_lower:
389
- return 'Reference and Knowledge Bases'
390
- elif 'aws.amazon.com' in url_lower:
391
- return 'Technology'
392
- # Add more specific cases as needed
393
- else:
394
- return bookmark['category']
 
 
 
 
 
 
395
 
396
  def fetch_url_info(bookmark):
397
  """
@@ -453,28 +441,6 @@ def fetch_url_info(bookmark):
453
  'slow_link': bookmark.get('slow_link', False),
454
  }
455
 
456
- def parse_bookmarks(file_content):
457
- """
458
- Parse bookmarks from HTML file.
459
- """
460
- logger.info("Parsing bookmarks")
461
- try:
462
- soup = BeautifulSoup(file_content, 'html.parser')
463
- extracted_bookmarks = []
464
- for link in soup.find_all('a'):
465
- url = link.get('href')
466
- title = link.text.strip()
467
- if url and title:
468
- if url.startswith('http://') or url.startswith('https://'):
469
- extracted_bookmarks.append({'url': url, 'title': title})
470
- else:
471
- logger.info(f"Skipping non-http/https URL: {url}")
472
- logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
473
- return extracted_bookmarks
474
- except Exception as e:
475
- logger.error("Error parsing bookmarks: %s", e, exc_info=True)
476
- raise
477
-
478
  def vectorize_and_index(bookmarks_list):
479
  """
480
  Create vector embeddings for bookmarks and build FAISS index with ID mapping.
@@ -546,14 +512,6 @@ def display_bookmarks():
546
  logger.info("HTML display generated")
547
  return cards
548
 
549
- def generate_summary_and_assign_category(bookmark):
550
- """
551
- Generate a concise summary and assign a category using a single LLM call.
552
- This function is now handled by the LLM worker thread.
553
- """
554
- # This function is now deprecated and handled by the worker thread.
555
- pass
556
-
557
  def process_uploaded_file(file, state_bookmarks):
558
  """
559
  Process the uploaded bookmarks file.
@@ -749,13 +707,13 @@ def chatbot_response(user_query, chat_history):
749
  for bookmark in matching_bookmarks
750
  ])
751
 
752
- prompt = f"""
753
  A user asked: "{user_query}"
754
  Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
755
  Bookmarks:
756
  {bookmarks_info}
757
  Provide a concise and helpful response.
758
- """
759
 
760
  response = openai.ChatCompletion.create(
761
  model='llama-3.1-70b-versatile', # Ensure this model is correct and available
@@ -772,8 +730,8 @@ Provide a concise and helpful response.
772
  chat_history.append({"role": "assistant", "content": answer})
773
  return chat_history
774
 
775
- except openai.error.RateLimitError as e:
776
- wait_time = int(e.headers.get("Retry-After", 5))
777
  logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
778
  time.sleep(wait_time)
779
  return chatbot_response(user_query, chat_history)
 
265
  tpm_bucket.wait_for_token(tokens=total_tokens)
266
 
267
  # Prepare prompt
268
+ prompt = f'''
269
+ You are an assistant that creates concise webpage summaries and assigns categories.
270
+ Provide summaries and categories for the following bookmarks:
271
 
272
+ '''
273
  for idx, bookmark in enumerate(batch, 1):
274
+ prompt += f'Bookmark {idx}:\nURL: {bookmark["url"]}\nTitle: {bookmark["title"]}\n\n'
275
 
276
  # Corrected f-string without backslashes
277
+ prompt += f'Categories:\n{", ".join([f\'"{cat}"\' for cat in CATEGORIES])}\n\n'
278
 
279
  prompt += "Format your response as a JSON object where each key is the bookmark URL and the value is another JSON object containing 'summary' and 'category'.\n\n"
280
  prompt += "Example:\n"
281
  prompt += "{\n"
282
+ prompt += ' "https://example.com": {\n'
283
+ prompt += ' "summary": "This is an example summary.",\n'
284
+ prompt += ' "category": "Technology"\n'
285
  prompt += " }\n"
286
  prompt += "}\n\n"
287
  prompt += "Now, provide the summaries and categories for the bookmarks listed above."
 
340
  bookmark['summary'] = 'No summary available.'
341
  bookmark['category'] = 'Uncategorized'
342
 
343
+ except openai.error.RateLimitError:
344
  logger.warning(f"LLM Rate limit reached. Retrying after 60 seconds.")
345
  # Re-enqueue the entire batch for retry
346
  for bookmark in batch:
 
359
  for _ in batch:
360
  llm_queue.task_done()
361
 
362
+ def parse_bookmarks(file_content):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  """
364
+ Parse bookmarks from HTML file.
365
  """
366
+ logger.info("Parsing bookmarks")
367
+ try:
368
+ soup = BeautifulSoup(file_content, 'html.parser')
369
+ extracted_bookmarks = []
370
+ for link in soup.find_all('a'):
371
+ url = link.get('href')
372
+ title = link.text.strip()
373
+ if url and title:
374
+ if url.startswith('http://') or url.startswith('https://'):
375
+ extracted_bookmarks.append({'url': url, 'title': title})
376
+ else:
377
+ logger.info(f"Skipping non-http/https URL: {url}")
378
+ logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
379
+ return extracted_bookmarks
380
+ except Exception as e:
381
+ logger.error("Error parsing bookmarks: %s", e, exc_info=True)
382
+ raise
383
 
384
  def fetch_url_info(bookmark):
385
  """
 
441
  'slow_link': bookmark.get('slow_link', False),
442
  }
443
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  def vectorize_and_index(bookmarks_list):
445
  """
446
  Create vector embeddings for bookmarks and build FAISS index with ID mapping.
 
512
  logger.info("HTML display generated")
513
  return cards
514
 
 
 
 
 
 
 
 
 
515
  def process_uploaded_file(file, state_bookmarks):
516
  """
517
  Process the uploaded bookmarks file.
 
707
  for bookmark in matching_bookmarks
708
  ])
709
 
710
+ prompt = f'''
711
  A user asked: "{user_query}"
712
  Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
713
  Bookmarks:
714
  {bookmarks_info}
715
  Provide a concise and helpful response.
716
+ '''
717
 
718
  response = openai.ChatCompletion.create(
719
  model='llama-3.1-70b-versatile', # Ensure this model is correct and available
 
730
  chat_history.append({"role": "assistant", "content": answer})
731
  return chat_history
732
 
733
+ except openai.error.RateLimitError:
734
+ wait_time = int(60) # Wait time can be adjusted or extracted from headers if available
735
  logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
736
  time.sleep(wait_time)
737
  return chatbot_response(user_query, chat_history)