Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ import sys
|
|
15 |
import concurrent.futures
|
16 |
from concurrent.futures import ThreadPoolExecutor
|
17 |
import threading
|
|
|
18 |
|
19 |
# Import OpenAI library
|
20 |
import openai
|
@@ -87,6 +88,44 @@ openai.api_base = "https://api.groq.com/openai/v1"
|
|
87 |
api_lock = threading.Lock()
|
88 |
last_api_call_time = 0
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
def extract_main_content(soup):
|
91 |
"""
|
92 |
Extract the main content from a webpage while filtering out boilerplate content.
|
@@ -169,16 +208,11 @@ def generate_summary_and_assign_category(bookmark):
|
|
169 |
|
170 |
while retry_count < max_retries:
|
171 |
try:
|
172 |
-
# Rate Limiting
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
if elapsed < 2:
|
178 |
-
sleep_duration = 2 - elapsed
|
179 |
-
logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
|
180 |
-
time.sleep(sleep_duration)
|
181 |
-
last_api_call_time = time.time()
|
182 |
|
183 |
html_content = bookmark.get('html_content', '')
|
184 |
soup = BeautifulSoup(html_content, 'html.parser')
|
@@ -447,7 +481,7 @@ def display_bookmarks():
|
|
447 |
# For dead links, use 'summary' if available
|
448 |
summary = bookmark.get('summary', '')
|
449 |
if not summary:
|
450 |
-
#
|
451 |
summary = 'No summary available.'
|
452 |
elif bookmark.get('slow_link'):
|
453 |
status = "⏳ Slow Response"
|
@@ -526,7 +560,7 @@ def process_uploaded_file(file, state_bookmarks):
|
|
526 |
|
527 |
# Process bookmarks concurrently with LLM calls
|
528 |
logger.info("Processing bookmarks with LLM concurrently")
|
529 |
-
with ThreadPoolExecutor(max_workers=
|
530 |
executor.map(generate_summary_and_assign_category, bookmarks)
|
531 |
|
532 |
try:
|
@@ -654,15 +688,9 @@ def chatbot_response(user_query, chat_history):
|
|
654 |
try:
|
655 |
chat_history.append({"role": "user", "content": user_query})
|
656 |
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
elapsed = current_time - last_api_call_time
|
661 |
-
if elapsed < 2:
|
662 |
-
sleep_duration = 2 - elapsed
|
663 |
-
logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
|
664 |
-
time.sleep(sleep_duration)
|
665 |
-
last_api_call_time = time.time()
|
666 |
|
667 |
query_vector = embedding_model.encode([user_query]).astype('float32')
|
668 |
k = 5
|
@@ -670,6 +698,7 @@ def chatbot_response(user_query, chat_history):
|
|
670 |
ids = ids.flatten()
|
671 |
|
672 |
id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
|
|
|
673 |
matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark and id_to_bookmark.get(id).get('summary')]
|
674 |
|
675 |
if not matching_bookmarks:
|
|
|
15 |
import concurrent.futures
|
16 |
from concurrent.futures import ThreadPoolExecutor
|
17 |
import threading
|
18 |
+
from collections import deque
|
19 |
|
20 |
# Import OpenAI library
|
21 |
import openai
|
|
|
88 |
api_lock = threading.Lock()
|
89 |
last_api_call_time = 0
|
90 |
|
91 |
+
# Rate Limiter Configuration
|
92 |
+
RPM_LIMIT = 30 # Requests per minute
|
93 |
+
TPM_LIMIT = 40000 # Tokens per minute
|
94 |
+
|
95 |
+
# Implementing a Token Bucket Rate Limiter
|
96 |
+
class TokenBucket:
|
97 |
+
def __init__(self, rate, capacity):
|
98 |
+
self.rate = rate # tokens per second
|
99 |
+
self.capacity = capacity
|
100 |
+
self.tokens = capacity
|
101 |
+
self.timestamp = time.time()
|
102 |
+
self.lock = threading.Lock()
|
103 |
+
|
104 |
+
def consume(self, tokens=1):
|
105 |
+
with self.lock:
|
106 |
+
now = time.time()
|
107 |
+
elapsed = now - self.timestamp
|
108 |
+
# Refill tokens
|
109 |
+
refill = elapsed * self.rate
|
110 |
+
self.tokens = min(self.capacity, self.tokens + refill)
|
111 |
+
self.timestamp = now
|
112 |
+
if self.tokens >= tokens:
|
113 |
+
self.tokens -= tokens
|
114 |
+
return True
|
115 |
+
else:
|
116 |
+
return False
|
117 |
+
|
118 |
+
def wait_for_token(self, tokens=1):
|
119 |
+
while not self.consume(tokens):
|
120 |
+
time.sleep(0.1)
|
121 |
+
|
122 |
+
# Initialize rate limiters
|
123 |
+
rpm_rate = RPM_LIMIT / 60 # tokens per second
|
124 |
+
tpm_rate = TPM_LIMIT / 60 # tokens per second
|
125 |
+
|
126 |
+
rpm_bucket = TokenBucket(rate=rpm_rate, capacity=RPM_LIMIT)
|
127 |
+
tpm_bucket = TokenBucket(rate=tpm_rate, capacity=TPM_LIMIT)
|
128 |
+
|
129 |
def extract_main_content(soup):
|
130 |
"""
|
131 |
Extract the main content from a webpage while filtering out boilerplate content.
|
|
|
208 |
|
209 |
while retry_count < max_retries:
|
210 |
try:
|
211 |
+
# Rate Limiting
|
212 |
+
rpm_bucket.wait_for_token()
|
213 |
+
# Estimate tokens: prompt + max_tokens
|
214 |
+
# Here, we assume max_tokens=150
|
215 |
+
tpm_bucket.wait_for_token(tokens=150)
|
|
|
|
|
|
|
|
|
|
|
216 |
|
217 |
html_content = bookmark.get('html_content', '')
|
218 |
soup = BeautifulSoup(html_content, 'html.parser')
|
|
|
481 |
# For dead links, use 'summary' if available
|
482 |
summary = bookmark.get('summary', '')
|
483 |
if not summary:
|
484 |
+
# Provide a default message or leave it empty
|
485 |
summary = 'No summary available.'
|
486 |
elif bookmark.get('slow_link'):
|
487 |
status = "⏳ Slow Response"
|
|
|
560 |
|
561 |
# Process bookmarks concurrently with LLM calls
|
562 |
logger.info("Processing bookmarks with LLM concurrently")
|
563 |
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
564 |
executor.map(generate_summary_and_assign_category, bookmarks)
|
565 |
|
566 |
try:
|
|
|
688 |
try:
|
689 |
chat_history.append({"role": "user", "content": user_query})
|
690 |
|
691 |
+
# Rate Limiting
|
692 |
+
rpm_bucket.wait_for_token()
|
693 |
+
tpm_bucket.wait_for_token(tokens=300) # Assuming max_tokens=300
|
|
|
|
|
|
|
|
|
|
|
|
|
694 |
|
695 |
query_vector = embedding_model.encode([user_query]).astype('float32')
|
696 |
k = 5
|
|
|
698 |
ids = ids.flatten()
|
699 |
|
700 |
id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
|
701 |
+
# Filter out bookmarks without summaries
|
702 |
matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark and id_to_bookmark.get(id).get('summary')]
|
703 |
|
704 |
if not matching_bookmarks:
|