Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -265,21 +265,23 @@ def llm_worker():
|
|
265 |
tpm_bucket.wait_for_token(tokens=total_tokens)
|
266 |
|
267 |
# Prepare prompt
|
268 |
-
prompt =
|
269 |
-
|
|
|
270 |
|
|
|
271 |
for idx, bookmark in enumerate(batch, 1):
|
272 |
-
prompt += f
|
273 |
|
274 |
# Corrected f-string without backslashes
|
275 |
-
prompt += f
|
276 |
|
277 |
prompt += "Format your response as a JSON object where each key is the bookmark URL and the value is another JSON object containing 'summary' and 'category'.\n\n"
|
278 |
prompt += "Example:\n"
|
279 |
prompt += "{\n"
|
280 |
-
prompt +=
|
281 |
-
prompt +=
|
282 |
-
prompt +=
|
283 |
prompt += " }\n"
|
284 |
prompt += "}\n\n"
|
285 |
prompt += "Now, provide the summaries and categories for the bookmarks listed above."
|
@@ -338,7 +340,7 @@ def llm_worker():
|
|
338 |
bookmark['summary'] = 'No summary available.'
|
339 |
bookmark['category'] = 'Uncategorized'
|
340 |
|
341 |
-
except openai.error.RateLimitError
|
342 |
logger.warning(f"LLM Rate limit reached. Retrying after 60 seconds.")
|
343 |
# Re-enqueue the entire batch for retry
|
344 |
for bookmark in batch:
|
@@ -357,41 +359,27 @@ def llm_worker():
|
|
357 |
for _ in batch:
|
358 |
llm_queue.task_done()
|
359 |
|
360 |
-
def
|
361 |
-
"""
|
362 |
-
Assign category based on keywords in the summary or URL.
|
363 |
-
"""
|
364 |
-
summary_lower = summary.lower()
|
365 |
-
url_lower = url.lower()
|
366 |
-
if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
|
367 |
-
return 'Social Media'
|
368 |
-
elif 'wikipedia' in url_lower:
|
369 |
-
return 'Reference and Knowledge Bases'
|
370 |
-
elif 'cloud computing' in summary_lower or 'aws' in summary_lower:
|
371 |
-
return 'Technology'
|
372 |
-
elif 'news' in summary_lower or 'media' in summary_lower:
|
373 |
-
return 'News and Media'
|
374 |
-
elif 'education' in summary_lower or 'learning' in summary_lower:
|
375 |
-
return 'Education and Learning'
|
376 |
-
# Add more conditions as needed
|
377 |
-
else:
|
378 |
-
return 'Uncategorized'
|
379 |
-
|
380 |
-
def validate_category(bookmark):
|
381 |
"""
|
382 |
-
|
383 |
"""
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
|
396 |
def fetch_url_info(bookmark):
|
397 |
"""
|
@@ -453,28 +441,6 @@ def fetch_url_info(bookmark):
|
|
453 |
'slow_link': bookmark.get('slow_link', False),
|
454 |
}
|
455 |
|
456 |
-
def parse_bookmarks(file_content):
|
457 |
-
"""
|
458 |
-
Parse bookmarks from HTML file.
|
459 |
-
"""
|
460 |
-
logger.info("Parsing bookmarks")
|
461 |
-
try:
|
462 |
-
soup = BeautifulSoup(file_content, 'html.parser')
|
463 |
-
extracted_bookmarks = []
|
464 |
-
for link in soup.find_all('a'):
|
465 |
-
url = link.get('href')
|
466 |
-
title = link.text.strip()
|
467 |
-
if url and title:
|
468 |
-
if url.startswith('http://') or url.startswith('https://'):
|
469 |
-
extracted_bookmarks.append({'url': url, 'title': title})
|
470 |
-
else:
|
471 |
-
logger.info(f"Skipping non-http/https URL: {url}")
|
472 |
-
logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
|
473 |
-
return extracted_bookmarks
|
474 |
-
except Exception as e:
|
475 |
-
logger.error("Error parsing bookmarks: %s", e, exc_info=True)
|
476 |
-
raise
|
477 |
-
|
478 |
def vectorize_and_index(bookmarks_list):
|
479 |
"""
|
480 |
Create vector embeddings for bookmarks and build FAISS index with ID mapping.
|
@@ -546,14 +512,6 @@ def display_bookmarks():
|
|
546 |
logger.info("HTML display generated")
|
547 |
return cards
|
548 |
|
549 |
-
def generate_summary_and_assign_category(bookmark):
|
550 |
-
"""
|
551 |
-
Generate a concise summary and assign a category using a single LLM call.
|
552 |
-
This function is now handled by the LLM worker thread.
|
553 |
-
"""
|
554 |
-
# This function is now deprecated and handled by the worker thread.
|
555 |
-
pass
|
556 |
-
|
557 |
def process_uploaded_file(file, state_bookmarks):
|
558 |
"""
|
559 |
Process the uploaded bookmarks file.
|
@@ -749,13 +707,13 @@ def chatbot_response(user_query, chat_history):
|
|
749 |
for bookmark in matching_bookmarks
|
750 |
])
|
751 |
|
752 |
-
prompt = f
|
753 |
A user asked: "{user_query}"
|
754 |
Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
|
755 |
Bookmarks:
|
756 |
{bookmarks_info}
|
757 |
Provide a concise and helpful response.
|
758 |
-
|
759 |
|
760 |
response = openai.ChatCompletion.create(
|
761 |
model='llama-3.1-70b-versatile', # Ensure this model is correct and available
|
@@ -772,8 +730,8 @@ Provide a concise and helpful response.
|
|
772 |
chat_history.append({"role": "assistant", "content": answer})
|
773 |
return chat_history
|
774 |
|
775 |
-
except openai.error.RateLimitError
|
776 |
-
wait_time = int(
|
777 |
logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
|
778 |
time.sleep(wait_time)
|
779 |
return chatbot_response(user_query, chat_history)
|
|
|
265 |
tpm_bucket.wait_for_token(tokens=total_tokens)
|
266 |
|
267 |
# Prepare prompt
|
268 |
+
prompt = f'''
|
269 |
+
You are an assistant that creates concise webpage summaries and assigns categories.
|
270 |
+
Provide summaries and categories for the following bookmarks:
|
271 |
|
272 |
+
'''
|
273 |
for idx, bookmark in enumerate(batch, 1):
|
274 |
+
prompt += f'Bookmark {idx}:\nURL: {bookmark["url"]}\nTitle: {bookmark["title"]}\n\n'
|
275 |
|
276 |
# Corrected f-string without backslashes
|
277 |
+
prompt += f'Categories:\n{", ".join([f\'"{cat}"\' for cat in CATEGORIES])}\n\n'
|
278 |
|
279 |
prompt += "Format your response as a JSON object where each key is the bookmark URL and the value is another JSON object containing 'summary' and 'category'.\n\n"
|
280 |
prompt += "Example:\n"
|
281 |
prompt += "{\n"
|
282 |
+
prompt += ' "https://example.com": {\n'
|
283 |
+
prompt += ' "summary": "This is an example summary.",\n'
|
284 |
+
prompt += ' "category": "Technology"\n'
|
285 |
prompt += " }\n"
|
286 |
prompt += "}\n\n"
|
287 |
prompt += "Now, provide the summaries and categories for the bookmarks listed above."
|
|
|
340 |
bookmark['summary'] = 'No summary available.'
|
341 |
bookmark['category'] = 'Uncategorized'
|
342 |
|
343 |
+
except openai.error.RateLimitError:
|
344 |
logger.warning(f"LLM Rate limit reached. Retrying after 60 seconds.")
|
345 |
# Re-enqueue the entire batch for retry
|
346 |
for bookmark in batch:
|
|
|
359 |
for _ in batch:
|
360 |
llm_queue.task_done()
|
361 |
|
362 |
+
def parse_bookmarks(file_content):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
"""
|
364 |
+
Parse bookmarks from HTML file.
|
365 |
"""
|
366 |
+
logger.info("Parsing bookmarks")
|
367 |
+
try:
|
368 |
+
soup = BeautifulSoup(file_content, 'html.parser')
|
369 |
+
extracted_bookmarks = []
|
370 |
+
for link in soup.find_all('a'):
|
371 |
+
url = link.get('href')
|
372 |
+
title = link.text.strip()
|
373 |
+
if url and title:
|
374 |
+
if url.startswith('http://') or url.startswith('https://'):
|
375 |
+
extracted_bookmarks.append({'url': url, 'title': title})
|
376 |
+
else:
|
377 |
+
logger.info(f"Skipping non-http/https URL: {url}")
|
378 |
+
logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
|
379 |
+
return extracted_bookmarks
|
380 |
+
except Exception as e:
|
381 |
+
logger.error("Error parsing bookmarks: %s", e, exc_info=True)
|
382 |
+
raise
|
383 |
|
384 |
def fetch_url_info(bookmark):
|
385 |
"""
|
|
|
441 |
'slow_link': bookmark.get('slow_link', False),
|
442 |
}
|
443 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
444 |
def vectorize_and_index(bookmarks_list):
|
445 |
"""
|
446 |
Create vector embeddings for bookmarks and build FAISS index with ID mapping.
|
|
|
512 |
logger.info("HTML display generated")
|
513 |
return cards
|
514 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
515 |
def process_uploaded_file(file, state_bookmarks):
|
516 |
"""
|
517 |
Process the uploaded bookmarks file.
|
|
|
707 |
for bookmark in matching_bookmarks
|
708 |
])
|
709 |
|
710 |
+
prompt = f'''
|
711 |
A user asked: "{user_query}"
|
712 |
Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
|
713 |
Bookmarks:
|
714 |
{bookmarks_info}
|
715 |
Provide a concise and helpful response.
|
716 |
+
'''
|
717 |
|
718 |
response = openai.ChatCompletion.create(
|
719 |
model='llama-3.1-70b-versatile', # Ensure this model is correct and available
|
|
|
730 |
chat_history.append({"role": "assistant", "content": answer})
|
731 |
return chat_history
|
732 |
|
733 |
+
except openai.error.RateLimitError:
|
734 |
+
wait_time = int(60) # Wait time can be adjusted or extracted from headers if available
|
735 |
logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
|
736 |
time.sleep(wait_time)
|
737 |
return chatbot_response(user_query, chat_history)
|