siddhartharya commited on
Commit
6e14c3b
·
verified ·
1 Parent(s): 34c2fad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -26
app.py CHANGED
@@ -15,7 +15,6 @@ import sys
15
  import concurrent.futures
16
  from concurrent.futures import ThreadPoolExecutor
17
  import threading
18
- from ratelimiter import RateLimiter # Optional
19
 
20
  # Import OpenAI library
21
  import openai
@@ -83,8 +82,8 @@ if not GROQ_API_KEY:
83
  openai.api_key = GROQ_API_KEY
84
  openai.api_base = "https://api.groq.com/openai/v1" # Ensure this is the correct base URL
85
 
86
- # Initialize rate limiter (optional, adjust based on rate limits)
87
- llm_rate_limiter = RateLimiter(max_calls=20, period=60) # Example: 20 calls per minute
88
 
89
  # Global variables for models to enable lazy loading
90
  embedding_model = None
@@ -237,8 +236,10 @@ Summary: [Your summary]
237
  Category: [One category]
238
  """
239
 
240
- # Call the LLM via Groq Cloud API with rate limiting
241
- with llm_rate_limiter:
 
 
242
  response = openai.ChatCompletion.create(
243
  model='llama-3.1-70b-versatile', # Ensure this is the correct model name
244
  messages=[
@@ -247,6 +248,10 @@ Category: [One category]
247
  max_tokens=150,
248
  temperature=0.5,
249
  )
 
 
 
 
250
  content = response['choices'][0]['message']['content'].strip()
251
  if not content:
252
  raise ValueError("Empty response received from the model.")
@@ -706,25 +711,46 @@ Bookmarks:
706
  Provide a concise and helpful response.
707
  """
708
 
709
- # Call the LLM via Groq Cloud API with rate limiting
710
- with llm_rate_limiter:
711
- response = openai.ChatCompletion.create(
712
- model='llama-3.1-70b-versatile', # Ensure this is the correct model name
713
- messages=[
714
- {"role": "user", "content": prompt}
715
- ],
716
- max_tokens=300,
717
- temperature=0.7,
718
- )
719
- answer = response['choices'][0]['message']['content'].strip()
720
- logger.info("Chatbot response generated")
721
- return chat_history + [{"role": "user", "content": user_query}, {"role": "assistant", "content": answer}]
722
-
723
- except openai.error.RateLimitError as e:
724
- wait_time = int(e.headers.get("Retry-After", 5))
725
- logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
726
- time.sleep(wait_time)
727
- return chatbot_response(user_query, chat_history, state_bookmarks) # Retry after waiting
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728
  except Exception as e:
729
  error_message = f"⚠️ Error processing your query: {str(e)}"
730
  logger.error(error_message, exc_info=True)
@@ -914,8 +940,8 @@ Navigate through the tabs to explore each feature in detail.
914
  logger.info("Launching Gradio app")
915
  demo.launch(debug=True)
916
  except Exception as e:
917
- logger.error(f"Error building the app: {e}", exc_info=True)
918
- print(f"Error building the app: {e}")
919
 
920
  if __name__ == "__main__":
921
  build_app()
 
15
  import concurrent.futures
16
  from concurrent.futures import ThreadPoolExecutor
17
  import threading
 
18
 
19
  # Import OpenAI library
20
  import openai
 
82
  openai.api_key = GROQ_API_KEY
83
  openai.api_base = "https://api.groq.com/openai/v1" # Ensure this is the correct base URL
84
 
85
+ # Initialize semaphore for rate limiting (allowing 1 concurrent API call)
86
+ api_semaphore = threading.Semaphore(1)
87
 
88
  # Global variables for models to enable lazy loading
89
  embedding_model = None
 
236
  Category: [One category]
237
  """
238
 
239
+ # Acquire semaphore before making API call
240
+ api_semaphore.acquire()
241
+ try:
242
+ # Call the LLM via Groq Cloud API with exponential backoff
243
  response = openai.ChatCompletion.create(
244
  model='llama-3.1-70b-versatile', # Ensure this is the correct model name
245
  messages=[
 
248
  max_tokens=150,
249
  temperature=0.5,
250
  )
251
+ finally:
252
+ # Release semaphore after API call
253
+ api_semaphore.release()
254
+
255
  content = response['choices'][0]['message']['content'].strip()
256
  if not content:
257
  raise ValueError("Empty response received from the model.")
 
711
  Provide a concise and helpful response.
712
  """
713
 
714
+ # Acquire semaphore before making API call
715
+ api_semaphore.acquire()
716
+ try:
717
+ # Call the LLM via Groq Cloud API with exponential backoff
718
+ max_retries = 5
719
+ retry_count = 0
720
+ base_wait = 1 # Initial wait time in seconds
721
+
722
+ while retry_count < max_retries:
723
+ try:
724
+ response = openai.ChatCompletion.create(
725
+ model='llama-3.1-70b-versatile', # Ensure this is the correct model name
726
+ messages=[
727
+ {"role": "user", "content": prompt}
728
+ ],
729
+ max_tokens=300,
730
+ temperature=0.7,
731
+ )
732
+ answer = response['choices'][0]['message']['content'].strip()
733
+ logger.info("Chatbot response generated")
734
+ return chat_history + [{"role": "user", "content": user_query}, {"role": "assistant", "content": answer}]
735
+ except openai.error.RateLimitError as e:
736
+ retry_count += 1
737
+ wait_time = base_wait * (2 ** retry_count) # Exponential backoff
738
+ logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying... (Attempt {retry_count}/{max_retries})")
739
+ time.sleep(wait_time)
740
+ except Exception as e:
741
+ error_message = f"⚠️ Error processing your query: {str(e)}"
742
+ logger.error(error_message, exc_info=True)
743
+ return chat_history + [{"role": "assistant", "content": error_message}]
744
+
745
+ # If max retries reached
746
+ error_message = "⚠️ Unable to process your query at the moment. Please try again later."
747
+ logger.error(error_message)
748
+ return chat_history + [{"role": "assistant", "content": error_message}]
749
+
750
+ finally:
751
+ # Release semaphore after API call
752
+ api_semaphore.release()
753
+
754
  except Exception as e:
755
  error_message = f"⚠️ Error processing your query: {str(e)}"
756
  logger.error(error_message, exc_info=True)
 
940
  logger.info("Launching Gradio app")
941
  demo.launch(debug=True)
942
  except Exception as e:
943
+ logger.error(f"Error building Gradio app: {e}", exc_info=True)
944
+ print(f"Error building Gradio app: {e}")
945
 
946
  if __name__ == "__main__":
947
  build_app()