siddhartharya commited on
Commit
2ff005a
·
verified ·
1 Parent(s): 97165e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -21
app.py CHANGED
@@ -15,6 +15,7 @@ import sys
15
  import concurrent.futures
16
  from concurrent.futures import ThreadPoolExecutor
17
  import threading
 
18
 
19
  # Import OpenAI library
20
  import openai
@@ -87,6 +88,44 @@ openai.api_base = "https://api.groq.com/openai/v1"
87
  api_lock = threading.Lock()
88
  last_api_call_time = 0
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def extract_main_content(soup):
91
  """
92
  Extract the main content from a webpage while filtering out boilerplate content.
@@ -169,16 +208,11 @@ def generate_summary_and_assign_category(bookmark):
169
 
170
  while retry_count < max_retries:
171
  try:
172
- # Rate Limiting Logic
173
- with api_lock:
174
- global last_api_call_time
175
- current_time = time.time()
176
- elapsed = current_time - last_api_call_time
177
- if elapsed < 2:
178
- sleep_duration = 2 - elapsed
179
- logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
180
- time.sleep(sleep_duration)
181
- last_api_call_time = time.time()
182
 
183
  html_content = bookmark.get('html_content', '')
184
  soup = BeautifulSoup(html_content, 'html.parser')
@@ -447,7 +481,7 @@ def display_bookmarks():
447
  # For dead links, use 'summary' if available
448
  summary = bookmark.get('summary', '')
449
  if not summary:
450
- # Optionally, you can skip setting summary or provide a default message
451
  summary = 'No summary available.'
452
  elif bookmark.get('slow_link'):
453
  status = "⏳ Slow Response"
@@ -526,7 +560,7 @@ def process_uploaded_file(file, state_bookmarks):
526
 
527
  # Process bookmarks concurrently with LLM calls
528
  logger.info("Processing bookmarks with LLM concurrently")
529
- with ThreadPoolExecutor(max_workers=1) as executor:
530
  executor.map(generate_summary_and_assign_category, bookmarks)
531
 
532
  try:
@@ -654,15 +688,9 @@ def chatbot_response(user_query, chat_history):
654
  try:
655
  chat_history.append({"role": "user", "content": user_query})
656
 
657
- with api_lock:
658
- global last_api_call_time
659
- current_time = time.time()
660
- elapsed = current_time - last_api_call_time
661
- if elapsed < 2:
662
- sleep_duration = 2 - elapsed
663
- logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
664
- time.sleep(sleep_duration)
665
- last_api_call_time = time.time()
666
 
667
  query_vector = embedding_model.encode([user_query]).astype('float32')
668
  k = 5
@@ -670,6 +698,7 @@ def chatbot_response(user_query, chat_history):
670
  ids = ids.flatten()
671
 
672
  id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
 
673
  matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark and id_to_bookmark.get(id).get('summary')]
674
 
675
  if not matching_bookmarks:
 
15
  import concurrent.futures
16
  from concurrent.futures import ThreadPoolExecutor
17
  import threading
18
+ from collections import deque
19
 
20
  # Import OpenAI library
21
  import openai
 
88
  api_lock = threading.Lock()
89
  last_api_call_time = 0
90
 
91
+ # Rate Limiter Configuration
92
+ RPM_LIMIT = 30 # Requests per minute
93
+ TPM_LIMIT = 40000 # Tokens per minute
94
+
95
+ # Implementing a Token Bucket Rate Limiter
96
+ class TokenBucket:
97
+ def __init__(self, rate, capacity):
98
+ self.rate = rate # tokens per second
99
+ self.capacity = capacity
100
+ self.tokens = capacity
101
+ self.timestamp = time.time()
102
+ self.lock = threading.Lock()
103
+
104
+ def consume(self, tokens=1):
105
+ with self.lock:
106
+ now = time.time()
107
+ elapsed = now - self.timestamp
108
+ # Refill tokens
109
+ refill = elapsed * self.rate
110
+ self.tokens = min(self.capacity, self.tokens + refill)
111
+ self.timestamp = now
112
+ if self.tokens >= tokens:
113
+ self.tokens -= tokens
114
+ return True
115
+ else:
116
+ return False
117
+
118
+ def wait_for_token(self, tokens=1):
119
+ while not self.consume(tokens):
120
+ time.sleep(0.1)
121
+
122
+ # Initialize rate limiters
123
+ rpm_rate = RPM_LIMIT / 60 # tokens per second
124
+ tpm_rate = TPM_LIMIT / 60 # tokens per second
125
+
126
+ rpm_bucket = TokenBucket(rate=rpm_rate, capacity=RPM_LIMIT)
127
+ tpm_bucket = TokenBucket(rate=tpm_rate, capacity=TPM_LIMIT)
128
+
129
  def extract_main_content(soup):
130
  """
131
  Extract the main content from a webpage while filtering out boilerplate content.
 
208
 
209
  while retry_count < max_retries:
210
  try:
211
+ # Rate Limiting
212
+ rpm_bucket.wait_for_token()
213
+ # Estimate tokens: prompt + max_tokens
214
+ # Here, we assume max_tokens=150
215
+ tpm_bucket.wait_for_token(tokens=150)
 
 
 
 
 
216
 
217
  html_content = bookmark.get('html_content', '')
218
  soup = BeautifulSoup(html_content, 'html.parser')
 
481
  # For dead links, use 'summary' if available
482
  summary = bookmark.get('summary', '')
483
  if not summary:
484
+ # Provide a default message or leave it empty
485
  summary = 'No summary available.'
486
  elif bookmark.get('slow_link'):
487
  status = "⏳ Slow Response"
 
560
 
561
  # Process bookmarks concurrently with LLM calls
562
  logger.info("Processing bookmarks with LLM concurrently")
563
+ with ThreadPoolExecutor(max_workers=5) as executor:
564
  executor.map(generate_summary_and_assign_category, bookmarks)
565
 
566
  try:
 
688
  try:
689
  chat_history.append({"role": "user", "content": user_query})
690
 
691
+ # Rate Limiting
692
+ rpm_bucket.wait_for_token()
693
+ tpm_bucket.wait_for_token(tokens=300) # Assuming max_tokens=300
 
 
 
 
 
 
694
 
695
  query_vector = embedding_model.encode([user_query]).astype('float32')
696
  k = 5
 
698
  ids = ids.flatten()
699
 
700
  id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
701
+ # Filter out bookmarks without summaries
702
  matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark and id_to_bookmark.get(id).get('summary')]
703
 
704
  if not matching_bookmarks: