Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -163,17 +163,18 @@ def generate_summary_and_assign_category(bookmark):
|
|
163 |
|
164 |
max_retries = 3
|
165 |
retry_count = 0
|
|
|
166 |
|
167 |
while retry_count < max_retries:
|
168 |
try:
|
169 |
-
# Rate Limiting Logic
|
170 |
with api_lock:
|
171 |
global last_api_call_time
|
172 |
current_time = time.time()
|
173 |
elapsed = current_time - last_api_call_time
|
174 |
-
if elapsed <
|
175 |
-
sleep_duration =
|
176 |
-
logger.info(f"
|
177 |
time.sleep(sleep_duration)
|
178 |
last_api_call_time = time.time()
|
179 |
|
@@ -244,7 +245,7 @@ Category: [One category]
|
|
244 |
tokens_per_minute = 40000
|
245 |
tokens_per_second = tokens_per_minute / 60
|
246 |
required_delay = total_tokens / tokens_per_second
|
247 |
-
sleep_time = max(required_delay,
|
248 |
|
249 |
response = openai.ChatCompletion.create(
|
250 |
model='llama-3.1-70b-versatile',
|
@@ -285,14 +286,21 @@ Category: [One category]
|
|
285 |
bookmark['category'] = 'Reference and Knowledge Bases'
|
286 |
|
287 |
logger.info("Successfully generated summary and assigned category")
|
|
|
|
|
288 |
time.sleep(sleep_time)
|
289 |
break
|
290 |
|
291 |
except openai.error.RateLimitError as e:
|
292 |
retry_count += 1
|
293 |
-
|
|
|
294 |
logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying... (Attempt {retry_count}/{max_retries})")
|
295 |
time.sleep(wait_time)
|
|
|
|
|
|
|
|
|
296 |
except Exception as e:
|
297 |
logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
|
298 |
bookmark['summary'] = 'No summary available.'
|
@@ -320,7 +328,6 @@ def parse_bookmarks(file_content):
|
|
320 |
except Exception as e:
|
321 |
logger.error("Error parsing bookmarks: %s", e, exc_info=True)
|
322 |
raise
|
323 |
-
|
324 |
def fetch_url_info(bookmark):
|
325 |
"""
|
326 |
Fetch information about a URL.
|
@@ -509,7 +516,6 @@ def process_uploaded_file(file, state_bookmarks):
|
|
509 |
state_bookmarks = bookmarks.copy()
|
510 |
|
511 |
return message, bookmark_html, state_bookmarks, bookmark_html, gr.update(choices=choices)
|
512 |
-
|
513 |
def delete_selected_bookmarks(selected_indices, state_bookmarks):
|
514 |
"""
|
515 |
Delete selected bookmarks and remove their vectors from the FAISS index.
|
@@ -616,35 +622,41 @@ def chatbot_response(user_query, chat_history):
|
|
616 |
try:
|
617 |
chat_history.append({"role": "user", "content": user_query})
|
618 |
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
630 |
-
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
|
644 |
-
|
645 |
-
|
646 |
-
|
647 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
648 |
A user asked: "{user_query}"
|
649 |
Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
|
650 |
Bookmarks:
|
@@ -652,39 +664,34 @@ Bookmarks:
|
|
652 |
Provide a concise and helpful response.
|
653 |
"""
|
654 |
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
tokens_per_second = tokens_per_minute / 60
|
664 |
-
required_delay = total_tokens / tokens_per_second
|
665 |
-
sleep_time = max(required_delay, 2)
|
666 |
-
|
667 |
-
response = openai.ChatCompletion.create(
|
668 |
-
model='llama-3.1-70b-versatile',
|
669 |
-
messages=[
|
670 |
-
{"role": "user", "content": prompt}
|
671 |
-
],
|
672 |
-
max_tokens=int(max_tokens),
|
673 |
-
temperature=0.7,
|
674 |
-
)
|
675 |
-
|
676 |
-
answer = response['choices'][0]['message']['content'].strip()
|
677 |
-
logger.info("Chatbot response generated")
|
678 |
-
time.sleep(sleep_time)
|
679 |
|
680 |
-
|
681 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
682 |
|
683 |
-
except openai.error.RateLimitError as e:
|
684 |
-
wait_time = int(e.headers.get("Retry-After", 5))
|
685 |
-
logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
|
686 |
-
time.sleep(wait_time)
|
687 |
-
return chatbot_response(user_query, chat_history)
|
688 |
except Exception as e:
|
689 |
error_message = f"⚠️ Error processing your query: {str(e)}"
|
690 |
logger.error(error_message, exc_info=True)
|
|
|
163 |
|
164 |
max_retries = 3
|
165 |
retry_count = 0
|
166 |
+
base_wait = 5 # Increased base wait time to 5 seconds
|
167 |
|
168 |
while retry_count < max_retries:
|
169 |
try:
|
170 |
+
# Rate Limiting Logic - Modified
|
171 |
with api_lock:
|
172 |
global last_api_call_time
|
173 |
current_time = time.time()
|
174 |
elapsed = current_time - last_api_call_time
|
175 |
+
if elapsed < base_wait:
|
176 |
+
sleep_duration = base_wait - elapsed
|
177 |
+
logger.info(f"Rate limiting: Waiting for {sleep_duration:.2f} seconds...")
|
178 |
time.sleep(sleep_duration)
|
179 |
last_api_call_time = time.time()
|
180 |
|
|
|
245 |
tokens_per_minute = 40000
|
246 |
tokens_per_second = tokens_per_minute / 60
|
247 |
required_delay = total_tokens / tokens_per_second
|
248 |
+
sleep_time = max(required_delay, base_wait) # Use at least base_wait seconds
|
249 |
|
250 |
response = openai.ChatCompletion.create(
|
251 |
model='llama-3.1-70b-versatile',
|
|
|
286 |
bookmark['category'] = 'Reference and Knowledge Bases'
|
287 |
|
288 |
logger.info("Successfully generated summary and assigned category")
|
289 |
+
|
290 |
+
# Add consistent delay after successful processing
|
291 |
time.sleep(sleep_time)
|
292 |
break
|
293 |
|
294 |
except openai.error.RateLimitError as e:
|
295 |
retry_count += 1
|
296 |
+
# Use exponential backoff with a maximum wait time
|
297 |
+
wait_time = min(base_wait * (2 ** retry_count), 30) # Cap at 30 seconds
|
298 |
logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying... (Attempt {retry_count}/{max_retries})")
|
299 |
time.sleep(wait_time)
|
300 |
+
if retry_count == max_retries:
|
301 |
+
bookmark['summary'] = 'Summary generation failed due to rate limits.'
|
302 |
+
bookmark['category'] = 'Uncategorized'
|
303 |
+
break
|
304 |
except Exception as e:
|
305 |
logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
|
306 |
bookmark['summary'] = 'No summary available.'
|
|
|
328 |
except Exception as e:
|
329 |
logger.error("Error parsing bookmarks: %s", e, exc_info=True)
|
330 |
raise
|
|
|
331 |
def fetch_url_info(bookmark):
|
332 |
"""
|
333 |
Fetch information about a URL.
|
|
|
516 |
state_bookmarks = bookmarks.copy()
|
517 |
|
518 |
return message, bookmark_html, state_bookmarks, bookmark_html, gr.update(choices=choices)
|
|
|
519 |
def delete_selected_bookmarks(selected_indices, state_bookmarks):
|
520 |
"""
|
521 |
Delete selected bookmarks and remove their vectors from the FAISS index.
|
|
|
622 |
try:
|
623 |
chat_history.append({"role": "user", "content": user_query})
|
624 |
|
625 |
+
# Implement better rate limiting
|
626 |
+
max_retries = 5
|
627 |
+
base_wait = 5 # Increased base wait time to 5 seconds
|
628 |
+
for attempt in range(max_retries):
|
629 |
+
try:
|
630 |
+
with api_lock:
|
631 |
+
global last_api_call_time
|
632 |
+
current_time = time.time()
|
633 |
+
elapsed = current_time - last_api_call_time
|
634 |
+
if elapsed < base_wait:
|
635 |
+
sleep_duration = base_wait - elapsed
|
636 |
+
logger.info(f"Rate limiting: Waiting for {sleep_duration:.2f} seconds...")
|
637 |
+
time.sleep(sleep_duration)
|
638 |
+
last_api_call_time = time.time()
|
639 |
+
|
640 |
+
# Search for relevant bookmarks
|
641 |
+
query_vector = embedding_model.encode([user_query]).astype('float32')
|
642 |
+
k = 5
|
643 |
+
distances, ids = faiss_index.search(query_vector, k)
|
644 |
+
ids = ids.flatten()
|
645 |
+
|
646 |
+
id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
|
647 |
+
matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark]
|
648 |
+
|
649 |
+
if not matching_bookmarks:
|
650 |
+
answer = "No relevant bookmarks found for your query."
|
651 |
+
chat_history.append({"role": "assistant", "content": answer})
|
652 |
+
return chat_history
|
653 |
+
|
654 |
+
bookmarks_info = "\n".join([
|
655 |
+
f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
|
656 |
+
for bookmark in matching_bookmarks
|
657 |
+
])
|
658 |
+
|
659 |
+
prompt = f"""
|
660 |
A user asked: "{user_query}"
|
661 |
Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
|
662 |
Bookmarks:
|
|
|
664 |
Provide a concise and helpful response.
|
665 |
"""
|
666 |
|
667 |
+
response = openai.ChatCompletion.create(
|
668 |
+
model='llama-3.1-70b-versatile',
|
669 |
+
messages=[
|
670 |
+
{"role": "user", "content": prompt}
|
671 |
+
],
|
672 |
+
max_tokens=300,
|
673 |
+
temperature=0.7,
|
674 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
675 |
|
676 |
+
answer = response['choices'][0]['message']['content'].strip()
|
677 |
+
logger.info("Chatbot response generated")
|
678 |
+
|
679 |
+
# Add a small delay between successful requests
|
680 |
+
time.sleep(base_wait)
|
681 |
+
|
682 |
+
chat_history.append({"role": "assistant", "content": answer})
|
683 |
+
return chat_history
|
684 |
+
|
685 |
+
except openai.error.RateLimitError as e:
|
686 |
+
wait_time = min(base_wait * (2 ** attempt), 30) # Cap maximum wait time at 30 seconds
|
687 |
+
logger.warning(f"Rate limit reached. Attempt {attempt + 1}/{max_retries}. Waiting for {wait_time} seconds...")
|
688 |
+
time.sleep(wait_time)
|
689 |
+
if attempt == max_retries - 1:
|
690 |
+
error_message = "⚠️ The service is currently experiencing high demand. Please try again in a few moments."
|
691 |
+
chat_history.append({"role": "assistant", "content": error_message})
|
692 |
+
return chat_history
|
693 |
+
continue
|
694 |
|
|
|
|
|
|
|
|
|
|
|
695 |
except Exception as e:
|
696 |
error_message = f"⚠️ Error processing your query: {str(e)}"
|
697 |
logger.error(error_message, exc_info=True)
|