Jyothikamalesh commited on
Commit
1b9ab22
·
verified ·
1 Parent(s): efe8c50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -13
app.py CHANGED
@@ -1,15 +1,10 @@
1
  import gradio as gr
2
- from openai import OpenAI, APIError
3
  import os
 
4
  import tenacity
5
- import asyncio
6
 
7
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
8
-
9
- client = OpenAI(
10
- base_url="https://api-inference.huggingface.co/v1/",
11
- api_key=ACCESS_TOKEN,
12
- )
13
 
14
  # Retry logic with tenacity for handling API rate limits
15
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
@@ -27,24 +22,24 @@ async def respond(
27
 
28
  response = ""
29
  # Properly stream chat completions using dot notation
30
- stream = client.chat.completions.create(
31
  model="NousResearch/Hermes-3-Llama-3.1-8B",
32
  max_tokens=max_tokens,
33
- stream=True,
34
  temperature=temperature,
35
  top_p=top_p,
36
  messages=messages,
 
37
  )
38
 
39
  # Stream response and concatenate tokens
40
  for chunk in stream:
41
- if hasattr(chunk.choices[0].delta, 'content'):
42
- token = chunk.choices[0].delta.content
43
  response += token
44
 
45
  return response
46
 
47
- except APIError as e:
48
  # Handle both string and dict types of error bodies
49
  error_details = e.body
50
  if isinstance(error_details, dict):
@@ -95,4 +90,4 @@ def launch_app():
95
  print("Please try again.")
96
 
97
  if __name__ == "__main__":
98
- launch_app()
 
1
  import gradio as gr
 
2
  import os
3
+ import openai
4
  import tenacity
 
5
 
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
+ openai.api_key = ACCESS_TOKEN
 
 
 
 
8
 
9
  # Retry logic with tenacity for handling API rate limits
10
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
 
22
 
23
  response = ""
24
  # Properly stream chat completions using dot notation
25
+ stream = openai.ChatCompletion.create(
26
  model="NousResearch/Hermes-3-Llama-3.1-8B",
27
  max_tokens=max_tokens,
 
28
  temperature=temperature,
29
  top_p=top_p,
30
  messages=messages,
31
+ stream=True,
32
  )
33
 
34
  # Stream response and concatenate tokens
35
  for chunk in stream:
36
+ if 'choices' in chunk and 'delta' in chunk['choices'][0] and 'content' in chunk['choices'][0]['delta']:
37
+ token = chunk['choices'][0]['delta']['content']
38
  response += token
39
 
40
  return response
41
 
42
+ except openai.error.APIError as e:
43
  # Handle both string and dict types of error bodies
44
  error_details = e.body
45
  if isinstance(error_details, dict):
 
90
  print("Please try again.")
91
 
92
  if __name__ == "__main__":
93
+ launch_app()