Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,6 @@ import sys
|
|
15 |
import concurrent.futures
|
16 |
from concurrent.futures import ThreadPoolExecutor
|
17 |
import threading
|
18 |
-
from ratelimiter import RateLimiter # Optional
|
19 |
|
20 |
# Import OpenAI library
|
21 |
import openai
|
@@ -83,8 +82,8 @@ if not GROQ_API_KEY:
|
|
83 |
openai.api_key = GROQ_API_KEY
|
84 |
openai.api_base = "https://api.groq.com/openai/v1" # Ensure this is the correct base URL
|
85 |
|
86 |
-
# Initialize rate
|
87 |
-
|
88 |
|
89 |
# Global variables for models to enable lazy loading
|
90 |
embedding_model = None
|
@@ -237,8 +236,10 @@ Summary: [Your summary]
|
|
237 |
Category: [One category]
|
238 |
"""
|
239 |
|
240 |
-
#
|
241 |
-
|
|
|
|
|
242 |
response = openai.ChatCompletion.create(
|
243 |
model='llama-3.1-70b-versatile', # Ensure this is the correct model name
|
244 |
messages=[
|
@@ -247,6 +248,10 @@ Category: [One category]
|
|
247 |
max_tokens=150,
|
248 |
temperature=0.5,
|
249 |
)
|
|
|
|
|
|
|
|
|
250 |
content = response['choices'][0]['message']['content'].strip()
|
251 |
if not content:
|
252 |
raise ValueError("Empty response received from the model.")
|
@@ -706,25 +711,46 @@ Bookmarks:
|
|
706 |
Provide a concise and helpful response.
|
707 |
"""
|
708 |
|
709 |
-
#
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
728 |
except Exception as e:
|
729 |
error_message = f"⚠️ Error processing your query: {str(e)}"
|
730 |
logger.error(error_message, exc_info=True)
|
@@ -914,8 +940,8 @@ Navigate through the tabs to explore each feature in detail.
|
|
914 |
logger.info("Launching Gradio app")
|
915 |
demo.launch(debug=True)
|
916 |
except Exception as e:
|
917 |
-
logger.error(f"Error building
|
918 |
-
print(f"Error building
|
919 |
|
920 |
if __name__ == "__main__":
|
921 |
build_app()
|
|
|
15 |
import concurrent.futures
|
16 |
from concurrent.futures import ThreadPoolExecutor
|
17 |
import threading
|
|
|
18 |
|
19 |
# Import OpenAI library
|
20 |
import openai
|
|
|
82 |
openai.api_key = GROQ_API_KEY
|
83 |
openai.api_base = "https://api.groq.com/openai/v1" # Ensure this is the correct base URL
|
84 |
|
85 |
+
# Initialize semaphore for rate limiting (allowing 1 concurrent API call)
|
86 |
+
api_semaphore = threading.Semaphore(1)
|
87 |
|
88 |
# Global variables for models to enable lazy loading
|
89 |
embedding_model = None
|
|
|
236 |
Category: [One category]
|
237 |
"""
|
238 |
|
239 |
+
# Acquire semaphore before making API call
|
240 |
+
api_semaphore.acquire()
|
241 |
+
try:
|
242 |
+
# Call the LLM via Groq Cloud API with exponential backoff
|
243 |
response = openai.ChatCompletion.create(
|
244 |
model='llama-3.1-70b-versatile', # Ensure this is the correct model name
|
245 |
messages=[
|
|
|
248 |
max_tokens=150,
|
249 |
temperature=0.5,
|
250 |
)
|
251 |
+
finally:
|
252 |
+
# Release semaphore after API call
|
253 |
+
api_semaphore.release()
|
254 |
+
|
255 |
content = response['choices'][0]['message']['content'].strip()
|
256 |
if not content:
|
257 |
raise ValueError("Empty response received from the model.")
|
|
|
711 |
Provide a concise and helpful response.
|
712 |
"""
|
713 |
|
714 |
+
# Acquire semaphore before making API call
|
715 |
+
api_semaphore.acquire()
|
716 |
+
try:
|
717 |
+
# Call the LLM via Groq Cloud API with exponential backoff
|
718 |
+
max_retries = 5
|
719 |
+
retry_count = 0
|
720 |
+
base_wait = 1 # Initial wait time in seconds
|
721 |
+
|
722 |
+
while retry_count < max_retries:
|
723 |
+
try:
|
724 |
+
response = openai.ChatCompletion.create(
|
725 |
+
model='llama-3.1-70b-versatile', # Ensure this is the correct model name
|
726 |
+
messages=[
|
727 |
+
{"role": "user", "content": prompt}
|
728 |
+
],
|
729 |
+
max_tokens=300,
|
730 |
+
temperature=0.7,
|
731 |
+
)
|
732 |
+
answer = response['choices'][0]['message']['content'].strip()
|
733 |
+
logger.info("Chatbot response generated")
|
734 |
+
return chat_history + [{"role": "user", "content": user_query}, {"role": "assistant", "content": answer}]
|
735 |
+
except openai.error.RateLimitError as e:
|
736 |
+
retry_count += 1
|
737 |
+
wait_time = base_wait * (2 ** retry_count) # Exponential backoff
|
738 |
+
logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying... (Attempt {retry_count}/{max_retries})")
|
739 |
+
time.sleep(wait_time)
|
740 |
+
except Exception as e:
|
741 |
+
error_message = f"⚠️ Error processing your query: {str(e)}"
|
742 |
+
logger.error(error_message, exc_info=True)
|
743 |
+
return chat_history + [{"role": "assistant", "content": error_message}]
|
744 |
+
|
745 |
+
# If max retries reached
|
746 |
+
error_message = "⚠️ Unable to process your query at the moment. Please try again later."
|
747 |
+
logger.error(error_message)
|
748 |
+
return chat_history + [{"role": "assistant", "content": error_message}]
|
749 |
+
|
750 |
+
finally:
|
751 |
+
# Release semaphore after API call
|
752 |
+
api_semaphore.release()
|
753 |
+
|
754 |
except Exception as e:
|
755 |
error_message = f"⚠️ Error processing your query: {str(e)}"
|
756 |
logger.error(error_message, exc_info=True)
|
|
|
940 |
logger.info("Launching Gradio app")
|
941 |
demo.launch(debug=True)
|
942 |
except Exception as e:
|
943 |
+
logger.error(f"Error building Gradio app: {e}", exc_info=True)
|
944 |
+
print(f"Error building Gradio app: {e}")
|
945 |
|
946 |
if __name__ == "__main__":
|
947 |
build_app()
|