Update app.py
Browse files
app.py
CHANGED
|
@@ -86,6 +86,7 @@ openai.api_base = "https://api.groq.com/openai/v1"
|
|
| 86 |
# Initialize global variables for rate limiting
|
| 87 |
api_lock = threading.Lock()
|
| 88 |
last_api_call_time = 0
|
|
|
|
| 89 |
def extract_main_content(soup):
|
| 90 |
"""
|
| 91 |
Extract the main content from a webpage while filtering out boilerplate content.
|
|
@@ -154,7 +155,6 @@ def get_page_metadata(soup):
|
|
| 154 |
metadata['title'] = og_title.get('content', '').strip()
|
| 155 |
|
| 156 |
return metadata
|
| 157 |
-
|
| 158 |
def generate_summary_and_assign_category(bookmark):
|
| 159 |
"""
|
| 160 |
Generate a concise summary and assign a category using a single LLM call.
|
|
@@ -298,6 +298,7 @@ Category: [One category]
|
|
| 298 |
bookmark['summary'] = 'No summary available.'
|
| 299 |
bookmark['category'] = 'Uncategorized'
|
| 300 |
break
|
|
|
|
| 301 |
def parse_bookmarks(file_content):
|
| 302 |
"""
|
| 303 |
Parse bookmarks from HTML file.
|
|
@@ -448,6 +449,7 @@ def display_bookmarks():
|
|
| 448 |
cards += card_html
|
| 449 |
logger.info("HTML display generated")
|
| 450 |
return cards
|
|
|
|
| 451 |
def process_uploaded_file(file, state_bookmarks):
|
| 452 |
"""
|
| 453 |
Process the uploaded bookmarks file.
|
|
@@ -614,41 +616,35 @@ def chatbot_response(user_query, chat_history):
|
|
| 614 |
try:
|
| 615 |
chat_history.append({"role": "user", "content": user_query})
|
| 616 |
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
bookmarks_info = "\n".join([
|
| 647 |
-
f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
|
| 648 |
-
for bookmark in matching_bookmarks
|
| 649 |
-
])
|
| 650 |
-
|
| 651 |
-
prompt = f"""
|
| 652 |
A user asked: "{user_query}"
|
| 653 |
Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
|
| 654 |
Bookmarks:
|
|
@@ -656,34 +652,39 @@ Bookmarks:
|
|
| 656 |
Provide a concise and helpful response.
|
| 657 |
"""
|
| 658 |
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
messages=[
|
| 662 |
-
{"role": "user", "content": prompt}
|
| 663 |
-
],
|
| 664 |
-
max_tokens=300,
|
| 665 |
-
temperature=0.7,
|
| 666 |
-
)
|
| 667 |
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
except Exception as e:
|
| 688 |
error_message = f"⚠️ Error processing your query: {str(e)}"
|
| 689 |
logger.error(error_message, exc_info=True)
|
|
|
|
| 86 |
# Initialize global variables for rate limiting
|
| 87 |
api_lock = threading.Lock()
|
| 88 |
last_api_call_time = 0
|
| 89 |
+
|
| 90 |
def extract_main_content(soup):
|
| 91 |
"""
|
| 92 |
Extract the main content from a webpage while filtering out boilerplate content.
|
|
|
|
| 155 |
metadata['title'] = og_title.get('content', '').strip()
|
| 156 |
|
| 157 |
return metadata
|
|
|
|
| 158 |
def generate_summary_and_assign_category(bookmark):
|
| 159 |
"""
|
| 160 |
Generate a concise summary and assign a category using a single LLM call.
|
|
|
|
| 298 |
bookmark['summary'] = 'No summary available.'
|
| 299 |
bookmark['category'] = 'Uncategorized'
|
| 300 |
break
|
| 301 |
+
|
| 302 |
def parse_bookmarks(file_content):
|
| 303 |
"""
|
| 304 |
Parse bookmarks from HTML file.
|
|
|
|
| 449 |
cards += card_html
|
| 450 |
logger.info("HTML display generated")
|
| 451 |
return cards
|
| 452 |
+
|
| 453 |
def process_uploaded_file(file, state_bookmarks):
|
| 454 |
"""
|
| 455 |
Process the uploaded bookmarks file.
|
|
|
|
| 616 |
try:
|
| 617 |
chat_history.append({"role": "user", "content": user_query})
|
| 618 |
|
| 619 |
+
with api_lock:
|
| 620 |
+
global last_api_call_time
|
| 621 |
+
current_time = time.time()
|
| 622 |
+
elapsed = current_time - last_api_call_time
|
| 623 |
+
if elapsed < 2:
|
| 624 |
+
sleep_duration = 2 - elapsed
|
| 625 |
+
logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
|
| 626 |
+
time.sleep(sleep_duration)
|
| 627 |
+
last_api_call_time = time.time()
|
| 628 |
+
|
| 629 |
+
query_vector = embedding_model.encode([user_query]).astype('float32')
|
| 630 |
+
k = 5
|
| 631 |
+
distances, ids = faiss_index.search(query_vector, k)
|
| 632 |
+
ids = ids.flatten()
|
| 633 |
+
|
| 634 |
+
id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
|
| 635 |
+
matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark]
|
| 636 |
+
|
| 637 |
+
if not matching_bookmarks:
|
| 638 |
+
answer = "No relevant bookmarks found for your query."
|
| 639 |
+
chat_history.append({"role": "assistant", "content": answer})
|
| 640 |
+
return chat_history
|
| 641 |
+
|
| 642 |
+
bookmarks_info = "\n".join([
|
| 643 |
+
f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
|
| 644 |
+
for bookmark in matching_bookmarks
|
| 645 |
+
])
|
| 646 |
+
|
| 647 |
+
prompt = f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 648 |
A user asked: "{user_query}"
|
| 649 |
Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
|
| 650 |
Bookmarks:
|
|
|
|
| 652 |
Provide a concise and helpful response.
|
| 653 |
"""
|
| 654 |
|
| 655 |
+
def estimate_tokens(text):
|
| 656 |
+
return len(text) / 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 657 |
|
| 658 |
+
prompt_tokens = estimate_tokens(prompt)
|
| 659 |
+
max_tokens = 300
|
| 660 |
+
total_tokens = prompt_tokens + max_tokens
|
| 661 |
+
|
| 662 |
+
tokens_per_minute = 40000
|
| 663 |
+
tokens_per_second = tokens_per_minute / 60
|
| 664 |
+
required_delay = total_tokens / tokens_per_second
|
| 665 |
+
sleep_time = max(required_delay, 2)
|
| 666 |
+
|
| 667 |
+
response = openai.ChatCompletion.create(
|
| 668 |
+
model='llama-3.1-70b-versatile',
|
| 669 |
+
messages=[
|
| 670 |
+
{"role": "user", "content": prompt}
|
| 671 |
+
],
|
| 672 |
+
max_tokens=int(max_tokens),
|
| 673 |
+
temperature=0.7,
|
| 674 |
+
)
|
| 675 |
+
|
| 676 |
+
answer = response['choices'][0]['message']['content'].strip()
|
| 677 |
+
logger.info("Chatbot response generated")
|
| 678 |
+
time.sleep(sleep_time)
|
| 679 |
+
|
| 680 |
+
chat_history.append({"role": "assistant", "content": answer})
|
| 681 |
+
return chat_history
|
| 682 |
|
| 683 |
+
except openai.error.RateLimitError as e:
|
| 684 |
+
wait_time = int(e.headers.get("Retry-After", 5))
|
| 685 |
+
logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
|
| 686 |
+
time.sleep(wait_time)
|
| 687 |
+
return chatbot_response(user_query, chat_history)
|
| 688 |
except Exception as e:
|
| 689 |
error_message = f"⚠️ Error processing your query: {str(e)}"
|
| 690 |
logger.error(error_message, exc_info=True)
|