Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -86,6 +86,7 @@ openai.api_base = "https://api.groq.com/openai/v1"
|
|
86 |
# Initialize global variables for rate limiting
|
87 |
api_lock = threading.Lock()
|
88 |
last_api_call_time = 0
|
|
|
89 |
def extract_main_content(soup):
|
90 |
"""
|
91 |
Extract the main content from a webpage while filtering out boilerplate content.
|
@@ -154,7 +155,6 @@ def get_page_metadata(soup):
|
|
154 |
metadata['title'] = og_title.get('content', '').strip()
|
155 |
|
156 |
return metadata
|
157 |
-
|
158 |
def generate_summary_and_assign_category(bookmark):
|
159 |
"""
|
160 |
Generate a concise summary and assign a category using a single LLM call.
|
@@ -298,6 +298,7 @@ Category: [One category]
|
|
298 |
bookmark['summary'] = 'No summary available.'
|
299 |
bookmark['category'] = 'Uncategorized'
|
300 |
break
|
|
|
301 |
def parse_bookmarks(file_content):
|
302 |
"""
|
303 |
Parse bookmarks from HTML file.
|
@@ -448,6 +449,7 @@ def display_bookmarks():
|
|
448 |
cards += card_html
|
449 |
logger.info("HTML display generated")
|
450 |
return cards
|
|
|
451 |
def process_uploaded_file(file, state_bookmarks):
|
452 |
"""
|
453 |
Process the uploaded bookmarks file.
|
@@ -614,41 +616,35 @@ def chatbot_response(user_query, chat_history):
|
|
614 |
try:
|
615 |
chat_history.append({"role": "user", "content": user_query})
|
616 |
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
630 |
-
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
|
644 |
-
|
645 |
-
|
646 |
-
bookmarks_info = "\n".join([
|
647 |
-
f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
|
648 |
-
for bookmark in matching_bookmarks
|
649 |
-
])
|
650 |
-
|
651 |
-
prompt = f"""
|
652 |
A user asked: "{user_query}"
|
653 |
Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
|
654 |
Bookmarks:
|
@@ -656,34 +652,39 @@ Bookmarks:
|
|
656 |
Provide a concise and helpful response.
|
657 |
"""
|
658 |
|
659 |
-
|
660 |
-
|
661 |
-
messages=[
|
662 |
-
{"role": "user", "content": prompt}
|
663 |
-
],
|
664 |
-
max_tokens=300,
|
665 |
-
temperature=0.7,
|
666 |
-
)
|
667 |
|
668 |
-
|
669 |
-
|
670 |
-
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
686 |
|
|
|
|
|
|
|
|
|
|
|
687 |
except Exception as e:
|
688 |
error_message = f"⚠️ Error processing your query: {str(e)}"
|
689 |
logger.error(error_message, exc_info=True)
|
|
|
86 |
# Initialize global variables for rate limiting
|
87 |
api_lock = threading.Lock()
|
88 |
last_api_call_time = 0
|
89 |
+
|
90 |
def extract_main_content(soup):
|
91 |
"""
|
92 |
Extract the main content from a webpage while filtering out boilerplate content.
|
|
|
155 |
metadata['title'] = og_title.get('content', '').strip()
|
156 |
|
157 |
return metadata
|
|
|
158 |
def generate_summary_and_assign_category(bookmark):
|
159 |
"""
|
160 |
Generate a concise summary and assign a category using a single LLM call.
|
|
|
298 |
bookmark['summary'] = 'No summary available.'
|
299 |
bookmark['category'] = 'Uncategorized'
|
300 |
break
|
301 |
+
|
302 |
def parse_bookmarks(file_content):
|
303 |
"""
|
304 |
Parse bookmarks from HTML file.
|
|
|
449 |
cards += card_html
|
450 |
logger.info("HTML display generated")
|
451 |
return cards
|
452 |
+
|
453 |
def process_uploaded_file(file, state_bookmarks):
|
454 |
"""
|
455 |
Process the uploaded bookmarks file.
|
|
|
616 |
try:
|
617 |
chat_history.append({"role": "user", "content": user_query})
|
618 |
|
619 |
+
with api_lock:
|
620 |
+
global last_api_call_time
|
621 |
+
current_time = time.time()
|
622 |
+
elapsed = current_time - last_api_call_time
|
623 |
+
if elapsed < 2:
|
624 |
+
sleep_duration = 2 - elapsed
|
625 |
+
logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
|
626 |
+
time.sleep(sleep_duration)
|
627 |
+
last_api_call_time = time.time()
|
628 |
+
|
629 |
+
query_vector = embedding_model.encode([user_query]).astype('float32')
|
630 |
+
k = 5
|
631 |
+
distances, ids = faiss_index.search(query_vector, k)
|
632 |
+
ids = ids.flatten()
|
633 |
+
|
634 |
+
id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
|
635 |
+
matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark]
|
636 |
+
|
637 |
+
if not matching_bookmarks:
|
638 |
+
answer = "No relevant bookmarks found for your query."
|
639 |
+
chat_history.append({"role": "assistant", "content": answer})
|
640 |
+
return chat_history
|
641 |
+
|
642 |
+
bookmarks_info = "\n".join([
|
643 |
+
f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
|
644 |
+
for bookmark in matching_bookmarks
|
645 |
+
])
|
646 |
+
|
647 |
+
prompt = f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
648 |
A user asked: "{user_query}"
|
649 |
Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
|
650 |
Bookmarks:
|
|
|
652 |
Provide a concise and helpful response.
|
653 |
"""
|
654 |
|
655 |
+
def estimate_tokens(text):
|
656 |
+
return len(text) / 4
|
|
|
|
|
|
|
|
|
|
|
|
|
657 |
|
658 |
+
prompt_tokens = estimate_tokens(prompt)
|
659 |
+
max_tokens = 300
|
660 |
+
total_tokens = prompt_tokens + max_tokens
|
661 |
+
|
662 |
+
tokens_per_minute = 40000
|
663 |
+
tokens_per_second = tokens_per_minute / 60
|
664 |
+
required_delay = total_tokens / tokens_per_second
|
665 |
+
sleep_time = max(required_delay, 2)
|
666 |
+
|
667 |
+
response = openai.ChatCompletion.create(
|
668 |
+
model='llama-3.1-70b-versatile',
|
669 |
+
messages=[
|
670 |
+
{"role": "user", "content": prompt}
|
671 |
+
],
|
672 |
+
max_tokens=int(max_tokens),
|
673 |
+
temperature=0.7,
|
674 |
+
)
|
675 |
+
|
676 |
+
answer = response['choices'][0]['message']['content'].strip()
|
677 |
+
logger.info("Chatbot response generated")
|
678 |
+
time.sleep(sleep_time)
|
679 |
+
|
680 |
+
chat_history.append({"role": "assistant", "content": answer})
|
681 |
+
return chat_history
|
682 |
|
683 |
+
except openai.error.RateLimitError as e:
|
684 |
+
wait_time = int(e.headers.get("Retry-After", 5))
|
685 |
+
logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
|
686 |
+
time.sleep(wait_time)
|
687 |
+
return chatbot_response(user_query, chat_history)
|
688 |
except Exception as e:
|
689 |
error_message = f"⚠️ Error processing your query: {str(e)}"
|
690 |
logger.error(error_message, exc_info=True)
|