Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,10 @@
|
|
1 |
import gradio as gr
|
2 |
-
from openai import OpenAI, APIError
|
3 |
import os
|
|
|
4 |
import tenacity
|
5 |
-
import asyncio
|
6 |
|
7 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
8 |
-
|
9 |
-
client = OpenAI(
|
10 |
-
base_url="https://api-inference.huggingface.co/v1/",
|
11 |
-
api_key=ACCESS_TOKEN,
|
12 |
-
)
|
13 |
|
14 |
# Retry logic with tenacity for handling API rate limits
|
15 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
|
@@ -27,24 +22,24 @@ async def respond(
|
|
27 |
|
28 |
response = ""
|
29 |
# Properly stream chat completions using dot notation
|
30 |
-
stream =
|
31 |
model="NousResearch/Hermes-3-Llama-3.1-8B",
|
32 |
max_tokens=max_tokens,
|
33 |
-
stream=True,
|
34 |
temperature=temperature,
|
35 |
top_p=top_p,
|
36 |
messages=messages,
|
|
|
37 |
)
|
38 |
|
39 |
# Stream response and concatenate tokens
|
40 |
for chunk in stream:
|
41 |
-
if
|
42 |
-
token = chunk
|
43 |
response += token
|
44 |
|
45 |
return response
|
46 |
|
47 |
-
except APIError as e:
|
48 |
# Handle both string and dict types of error bodies
|
49 |
error_details = e.body
|
50 |
if isinstance(error_details, dict):
|
@@ -95,4 +90,4 @@ def launch_app():
|
|
95 |
print("Please try again.")
|
96 |
|
97 |
if __name__ == "__main__":
|
98 |
-
launch_app()
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import os
|
3 |
+
import openai
|
4 |
import tenacity
|
|
|
5 |
|
6 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
7 |
+
openai.api_key = ACCESS_TOKEN
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# Retry logic with tenacity for handling API rate limits
|
10 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
|
|
|
22 |
|
23 |
response = ""
|
24 |
# Properly stream chat completions using dot notation
|
25 |
+
stream = openai.ChatCompletion.create(
|
26 |
model="NousResearch/Hermes-3-Llama-3.1-8B",
|
27 |
max_tokens=max_tokens,
|
|
|
28 |
temperature=temperature,
|
29 |
top_p=top_p,
|
30 |
messages=messages,
|
31 |
+
stream=True,
|
32 |
)
|
33 |
|
34 |
# Stream response and concatenate tokens
|
35 |
for chunk in stream:
|
36 |
+
if 'choices' in chunk and 'delta' in chunk['choices'][0] and 'content' in chunk['choices'][0]['delta']:
|
37 |
+
token = chunk['choices'][0]['delta']['content']
|
38 |
response += token
|
39 |
|
40 |
return response
|
41 |
|
42 |
+
except openai.error.APIError as e:
|
43 |
# Handle both string and dict types of error bodies
|
44 |
error_details = e.body
|
45 |
if isinstance(error_details, dict):
|
|
|
90 |
print("Please try again.")
|
91 |
|
92 |
if __name__ == "__main__":
|
93 |
+
launch_app()
|