Spaces:

siddhartharya
/

Bookmark-Manager

Running

App Files Files Community

siddhartharya commited on Nov 25, 2024

Commit

6e14c3b

verified ·

1 Parent(s): 34c2fad

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -26

app.py CHANGED Viewed

@@ -15,7 +15,6 @@ import sys
 import concurrent.futures
 from concurrent.futures import ThreadPoolExecutor
 import threading
-from ratelimiter import RateLimiter  # Optional
 # Import OpenAI library
 import openai
@@ -83,8 +82,8 @@ if not GROQ_API_KEY:
 openai.api_key = GROQ_API_KEY
 openai.api_base = "https://api.groq.com/openai/v1"  # Ensure this is the correct base URL
-# Initialize rate limiter (optional, adjust based on rate limits)
-llm_rate_limiter = RateLimiter(max_calls=20, period=60)  # Example: 20 calls per minute
 # Global variables for models to enable lazy loading
 embedding_model = None
@@ -237,8 +236,10 @@ Summary: [Your summary]
 Category: [One category]
 """
-            # Call the LLM via Groq Cloud API with rate limiting
-            with llm_rate_limiter:
                 response = openai.ChatCompletion.create(
                     model='llama-3.1-70b-versatile',  # Ensure this is the correct model name
                     messages=[
@@ -247,6 +248,10 @@ Category: [One category]
                     max_tokens=150,
                     temperature=0.5,
                 )
             content = response['choices'][0]['message']['content'].strip()
             if not content:
                 raise ValueError("Empty response received from the model.")
@@ -706,25 +711,46 @@ Bookmarks:
 Provide a concise and helpful response.
 """
-        # Call the LLM via Groq Cloud API with rate limiting
-        with llm_rate_limiter:
-            response = openai.ChatCompletion.create(
-                model='llama-3.1-70b-versatile',  # Ensure this is the correct model name
-                messages=[
-                    {"role": "user", "content": prompt}
-                ],
-                max_tokens=300,
-                temperature=0.7,
-            )
-        answer = response['choices'][0]['message']['content'].strip()
-        logger.info("Chatbot response generated")
-        return chat_history + [{"role": "user", "content": user_query}, {"role": "assistant", "content": answer}]
-    except openai.error.RateLimitError as e:
-        wait_time = int(e.headers.get("Retry-After", 5))
-        logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
-        time.sleep(wait_time)
-        return chatbot_response(user_query, chat_history, state_bookmarks)  # Retry after waiting
     except Exception as e:
         error_message = f"⚠️ Error processing your query: {str(e)}"
         logger.error(error_message, exc_info=True)
@@ -914,8 +940,8 @@ Navigate through the tabs to explore each feature in detail.
         logger.info("Launching Gradio app")
         demo.launch(debug=True)
     except Exception as e:
-        logger.error(f"Error building the app: {e}", exc_info=True)
-        print(f"Error building the app: {e}")
 if __name__ == "__main__":
     build_app()

 import concurrent.futures
 from concurrent.futures import ThreadPoolExecutor
 import threading
 # Import OpenAI library
 import openai
 openai.api_key = GROQ_API_KEY
 openai.api_base = "https://api.groq.com/openai/v1"  # Ensure this is the correct base URL
+# Initialize semaphore for rate limiting (allowing 1 concurrent API call)
+api_semaphore = threading.Semaphore(1)
 # Global variables for models to enable lazy loading
 embedding_model = None
 Category: [One category]
 """
+            # Acquire semaphore before making API call
+            api_semaphore.acquire()
+            try:
+                # Call the LLM via Groq Cloud API with exponential backoff
                 response = openai.ChatCompletion.create(
                     model='llama-3.1-70b-versatile',  # Ensure this is the correct model name
                     messages=[
                     max_tokens=150,
                     temperature=0.5,
                 )
+            finally:
+                # Release semaphore after API call
+                api_semaphore.release()
             content = response['choices'][0]['message']['content'].strip()
             if not content:
                 raise ValueError("Empty response received from the model.")
 Provide a concise and helpful response.
 """
+        # Acquire semaphore before making API call
+        api_semaphore.acquire()
+        try:
+            # Call the LLM via Groq Cloud API with exponential backoff
+            max_retries = 5
+            retry_count = 0
+            base_wait = 1  # Initial wait time in seconds
+            while retry_count < max_retries:
+                try:
+                    response = openai.ChatCompletion.create(
+                        model='llama-3.1-70b-versatile',  # Ensure this is the correct model name
+                        messages=[
+                            {"role": "user", "content": prompt}
+                        ],
+                        max_tokens=300,
+                        temperature=0.7,
+                    )
+                    answer = response['choices'][0]['message']['content'].strip()
+                    logger.info("Chatbot response generated")
+                    return chat_history + [{"role": "user", "content": user_query}, {"role": "assistant", "content": answer}]
+                except openai.error.RateLimitError as e:
+                    retry_count += 1
+                    wait_time = base_wait * (2 ** retry_count)  # Exponential backoff
+                    logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying... (Attempt {retry_count}/{max_retries})")
+                    time.sleep(wait_time)
+                except Exception as e:
+                    error_message = f"⚠️ Error processing your query: {str(e)}"
+                    logger.error(error_message, exc_info=True)
+                    return chat_history + [{"role": "assistant", "content": error_message}]
+            # If max retries reached
+            error_message = "⚠️ Unable to process your query at the moment. Please try again later."
+            logger.error(error_message)
+            return chat_history + [{"role": "assistant", "content": error_message}]
+        finally:
+            # Release semaphore after API call
+            api_semaphore.release()
     except Exception as e:
         error_message = f"⚠️ Error processing your query: {str(e)}"
         logger.error(error_message, exc_info=True)
         logger.info("Launching Gradio app")
         demo.launch(debug=True)
     except Exception as e:
+        logger.error(f"Error building Gradio app: {e}", exc_info=True)
+        print(f"Error building Gradio app: {e}")
 if __name__ == "__main__":
     build_app()