Spaces:

siddhartharya
/

Bookmark-Manager

Running

App Files Files Community

siddhartharya commited on Nov 26, 2024

Commit

5b290a0

verified ·

1 Parent(s): f63ecfa

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -68

app.py CHANGED Viewed

@@ -163,17 +163,18 @@ def generate_summary_and_assign_category(bookmark):
     max_retries = 3
     retry_count = 0
     while retry_count < max_retries:
         try:
-            # Rate Limiting Logic
             with api_lock:
                 global last_api_call_time
                 current_time = time.time()
                 elapsed = current_time - last_api_call_time
-                if elapsed < 2:
-                    sleep_duration = 2 - elapsed
-                    logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
                     time.sleep(sleep_duration)
                 last_api_call_time = time.time()
@@ -244,7 +245,7 @@ Category: [One category]
             tokens_per_minute = 40000
             tokens_per_second = tokens_per_minute / 60
             required_delay = total_tokens / tokens_per_second
-            sleep_time = max(required_delay, 2)
             response = openai.ChatCompletion.create(
                 model='llama-3.1-70b-versatile',
@@ -285,14 +286,21 @@ Category: [One category]
                 bookmark['category'] = 'Reference and Knowledge Bases'
             logger.info("Successfully generated summary and assigned category")
             time.sleep(sleep_time)
             break
         except openai.error.RateLimitError as e:
             retry_count += 1
-            wait_time = int(e.headers.get("Retry-After", 5))
             logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying... (Attempt {retry_count}/{max_retries})")
             time.sleep(wait_time)
         except Exception as e:
             logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
             bookmark['summary'] = 'No summary available.'
@@ -320,7 +328,6 @@ def parse_bookmarks(file_content):
     except Exception as e:
         logger.error("Error parsing bookmarks: %s", e, exc_info=True)
         raise
 def fetch_url_info(bookmark):
     """
     Fetch information about a URL.
@@ -509,7 +516,6 @@ def process_uploaded_file(file, state_bookmarks):
     state_bookmarks = bookmarks.copy()
     return message, bookmark_html, state_bookmarks, bookmark_html, gr.update(choices=choices)
 def delete_selected_bookmarks(selected_indices, state_bookmarks):
     """
     Delete selected bookmarks and remove their vectors from the FAISS index.
@@ -616,35 +622,41 @@ def chatbot_response(user_query, chat_history):
     try:
         chat_history.append({"role": "user", "content": user_query})
-        with api_lock:
-            global last_api_call_time
-            current_time = time.time()
-            elapsed = current_time - last_api_call_time
-            if elapsed < 2:
-                sleep_duration = 2 - elapsed
-                logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
-                time.sleep(sleep_duration)
-            last_api_call_time = time.time()
-        query_vector = embedding_model.encode([user_query]).astype('float32')
-        k = 5
-        distances, ids = faiss_index.search(query_vector, k)
-        ids = ids.flatten()
-        id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
-        matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark]
-        if not matching_bookmarks:
-            answer = "No relevant bookmarks found for your query."
-            chat_history.append({"role": "assistant", "content": answer})
-            return chat_history
-        bookmarks_info = "\n".join([
-            f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
-            for bookmark in matching_bookmarks
-        ])
-        prompt = f"""
 A user asked: "{user_query}"
 Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
 Bookmarks:
@@ -652,39 +664,34 @@ Bookmarks:
 Provide a concise and helpful response.
 """
-        def estimate_tokens(text):
-            return len(text) / 4
-        prompt_tokens = estimate_tokens(prompt)
-        max_tokens = 300
-        total_tokens = prompt_tokens + max_tokens
-        tokens_per_minute = 40000
-        tokens_per_second = tokens_per_minute / 60
-        required_delay = total_tokens / tokens_per_second
-        sleep_time = max(required_delay, 2)
-        response = openai.ChatCompletion.create(
-            model='llama-3.1-70b-versatile',
-            messages=[
-                {"role": "user", "content": prompt}
-            ],
-            max_tokens=int(max_tokens),
-            temperature=0.7,
-        )
-        answer = response['choices'][0]['message']['content'].strip()
-        logger.info("Chatbot response generated")
-        time.sleep(sleep_time)
-        chat_history.append({"role": "assistant", "content": answer})
-        return chat_history
-    except openai.error.RateLimitError as e:
-        wait_time = int(e.headers.get("Retry-After", 5))
-        logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
-        time.sleep(wait_time)
-        return chatbot_response(user_query, chat_history)
     except Exception as e:
         error_message = f"⚠️ Error processing your query: {str(e)}"
         logger.error(error_message, exc_info=True)

     max_retries = 3
     retry_count = 0
+    base_wait = 5  # Increased base wait time to 5 seconds
     while retry_count < max_retries:
         try:
+            # Rate Limiting Logic - Modified
             with api_lock:
                 global last_api_call_time
                 current_time = time.time()
                 elapsed = current_time - last_api_call_time
+                if elapsed < base_wait:
+                    sleep_duration = base_wait - elapsed
+                    logger.info(f"Rate limiting: Waiting for {sleep_duration:.2f} seconds...")
                     time.sleep(sleep_duration)
                 last_api_call_time = time.time()
             tokens_per_minute = 40000
             tokens_per_second = tokens_per_minute / 60
             required_delay = total_tokens / tokens_per_second
+            sleep_time = max(required_delay, base_wait)  # Use at least base_wait seconds
             response = openai.ChatCompletion.create(
                 model='llama-3.1-70b-versatile',
                 bookmark['category'] = 'Reference and Knowledge Bases'
             logger.info("Successfully generated summary and assigned category")
+            # Add consistent delay after successful processing
             time.sleep(sleep_time)
             break
         except openai.error.RateLimitError as e:
             retry_count += 1
+            # Use exponential backoff with a maximum wait time
+            wait_time = min(base_wait * (2 ** retry_count), 30)  # Cap at 30 seconds
             logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying... (Attempt {retry_count}/{max_retries})")
             time.sleep(wait_time)
+            if retry_count == max_retries:
+                bookmark['summary'] = 'Summary generation failed due to rate limits.'
+                bookmark['category'] = 'Uncategorized'
+                break
         except Exception as e:
             logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
             bookmark['summary'] = 'No summary available.'
     except Exception as e:
         logger.error("Error parsing bookmarks: %s", e, exc_info=True)
         raise
 def fetch_url_info(bookmark):
     """
     Fetch information about a URL.
     state_bookmarks = bookmarks.copy()
     return message, bookmark_html, state_bookmarks, bookmark_html, gr.update(choices=choices)
 def delete_selected_bookmarks(selected_indices, state_bookmarks):
     """
     Delete selected bookmarks and remove their vectors from the FAISS index.
     try:
         chat_history.append({"role": "user", "content": user_query})
+        # Implement better rate limiting
+        max_retries = 5
+        base_wait = 5  # Increased base wait time to 5 seconds
+        for attempt in range(max_retries):
+            try:
+                with api_lock:
+                    global last_api_call_time
+                    current_time = time.time()
+                    elapsed = current_time - last_api_call_time
+                    if elapsed < base_wait:
+                        sleep_duration = base_wait - elapsed
+                        logger.info(f"Rate limiting: Waiting for {sleep_duration:.2f} seconds...")
+                        time.sleep(sleep_duration)
+                    last_api_call_time = time.time()
+                # Search for relevant bookmarks
+                query_vector = embedding_model.encode([user_query]).astype('float32')
+                k = 5
+                distances, ids = faiss_index.search(query_vector, k)
+                ids = ids.flatten()
+                id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
+                matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark]
+                if not matching_bookmarks:
+                    answer = "No relevant bookmarks found for your query."
+                    chat_history.append({"role": "assistant", "content": answer})
+                    return chat_history
+                bookmarks_info = "\n".join([
+                    f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
+                    for bookmark in matching_bookmarks
+                ])
+                prompt = f"""
 A user asked: "{user_query}"
 Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
 Bookmarks:
 Provide a concise and helpful response.
 """
+                response = openai.ChatCompletion.create(
+                    model='llama-3.1-70b-versatile',
+                    messages=[
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=300,
+                    temperature=0.7,
+                )
+                answer = response['choices'][0]['message']['content'].strip()
+                logger.info("Chatbot response generated")
+                # Add a small delay between successful requests
+                time.sleep(base_wait)
+                chat_history.append({"role": "assistant", "content": answer})
+                return chat_history
+            except openai.error.RateLimitError as e:
+                wait_time = min(base_wait * (2 ** attempt), 30)  # Cap maximum wait time at 30 seconds
+                logger.warning(f"Rate limit reached. Attempt {attempt + 1}/{max_retries}. Waiting for {wait_time} seconds...")
+                time.sleep(wait_time)
+                if attempt == max_retries - 1:
+                    error_message = "⚠️ The service is currently experiencing high demand. Please try again in a few moments."
+                    chat_history.append({"role": "assistant", "content": error_message})
+                    return chat_history
+                continue
     except Exception as e:
         error_message = f"⚠️ Error processing your query: {str(e)}"
         logger.error(error_message, exc_info=True)