Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
-
import
|
4 |
import tenacity
|
5 |
import nest_asyncio
|
|
|
6 |
|
7 |
nest_asyncio.apply()
|
8 |
|
9 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
10 |
-
openai.api_key = ACCESS_TOKEN
|
11 |
|
12 |
# Retry logic with tenacity for handling API rate limits
|
13 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
|
@@ -23,39 +23,26 @@ async def respond(
|
|
23 |
messages = [{"role": "system", "content": system_message},
|
24 |
{"role": "user", "content": message}]
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
token = chunk['choices'][0]['delta']['content']
|
41 |
-
response += token
|
42 |
-
|
43 |
-
return response
|
44 |
-
|
45 |
-
except openai.error.APIError as e:
|
46 |
-
# Handle both string and dict types of error bodies
|
47 |
-
error_details = e.body
|
48 |
-
if isinstance(error_details, dict):
|
49 |
-
error_type = error_details.get("type", "Unknown")
|
50 |
-
error_code = error_details.get("code", "Unknown")
|
51 |
-
error_param = error_details.get("param", "Unknown")
|
52 |
-
error_message = error_details.get("message", "An error occurred.")
|
53 |
-
error_str = f"{error_type}: {error_message} (code: {error_code}, param: {error_param})"
|
54 |
-
else:
|
55 |
-
error_str = f"Error: {error_details}"
|
56 |
-
|
57 |
-
print(f"APIError: {error_str}")
|
58 |
-
return error_str
|
59 |
|
60 |
except Exception as e:
|
61 |
print(f"Exception: {e}")
|
@@ -64,7 +51,6 @@ async def respond(
|
|
64 |
|
65 |
# Gradio function to handle user input and response generation without history
|
66 |
def generate_response(message, system_message, max_tokens, temperature, top_p):
|
67 |
-
import asyncio
|
68 |
loop = asyncio.new_event_loop()
|
69 |
asyncio.set_event_loop(loop)
|
70 |
response = loop.run_until_complete(respond(message, system_message, max_tokens, temperature, top_p))
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
+
import aiohttp
|
4 |
import tenacity
|
5 |
import nest_asyncio
|
6 |
+
import asyncio
|
7 |
|
8 |
nest_asyncio.apply()
|
9 |
|
10 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
|
|
11 |
|
12 |
# Retry logic with tenacity for handling API rate limits
|
13 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
|
|
|
23 |
messages = [{"role": "system", "content": system_message},
|
24 |
{"role": "user", "content": message}]
|
25 |
|
26 |
+
async with aiohttp.ClientSession() as session:
|
27 |
+
async with session.post(
|
28 |
+
"https://api.openai.com/v1/chat/completions",
|
29 |
+
headers={"Authorization": f"Bearer {ACCESS_TOKEN}"},
|
30 |
+
json={
|
31 |
+
"model": "NousResearch/Hermes-3-Llama-3.1-8B",
|
32 |
+
"max_tokens": max_tokens,
|
33 |
+
"temperature": temperature,
|
34 |
+
"top_p": top_p,
|
35 |
+
"messages": messages,
|
36 |
+
"stream": True,
|
37 |
+
},
|
38 |
+
) as response:
|
39 |
+
response.raise_for_status()
|
40 |
+
response_text = await response.text()
|
41 |
+
return response_text
|
42 |
|
43 |
+
except aiohttp.ClientError as e:
|
44 |
+
print(f"ClientError: {e}")
|
45 |
+
return "Error occurred. Please try again."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
except Exception as e:
|
48 |
print(f"Exception: {e}")
|
|
|
51 |
|
52 |
# Gradio function to handle user input and response generation without history
|
53 |
def generate_response(message, system_message, max_tokens, temperature, top_p):
|
|
|
54 |
loop = asyncio.new_event_loop()
|
55 |
asyncio.set_event_loop(loop)
|
56 |
response = loop.run_until_complete(respond(message, system_message, max_tokens, temperature, top_p))
|