Jyothikamalesh commited on
Commit
18d6e67
·
verified ·
1 Parent(s): d2bcd0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -24
app.py CHANGED
@@ -1,8 +1,7 @@
1
- #refer llama recipes for more info https://github.com/huggingface/huggingface-llama-recipes/blob/main/inference-api.ipynb
2
- #huggingface-llama-recipes : https://github.com/huggingface/huggingface-llama-recipes/tree/main
3
  import gradio as gr
4
- from openai import OpenAI
5
  import os
 
6
 
7
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
8
 
@@ -11,6 +10,7 @@ client = OpenAI(
11
  api_key=ACCESS_TOKEN,
12
  )
13
 
 
14
  def respond(
15
  message,
16
  history: list[tuple[str, str]],
@@ -19,31 +19,49 @@ def respond(
19
  temperature,
20
  top_p,
21
  ):
22
- messages = [{"role": "system", "content": system_message}]
 
23
 
24
- for val in history:
25
- if val[0]:
26
- messages.append({"role": "user", "content": val[0]})
27
- if val[1]:
28
- messages.append({"role": "assistant", "content": val[1]})
29
 
30
- messages.append({"role": "user", "content": message})
31
 
32
- response = ""
33
-
34
- for message in client.chat.completions.create(
35
- model="NousResearch/Hermes-3-Llama-3.1-8B",
36
- max_tokens=max_tokens,
37
- stream=True,
38
- temperature=temperature,
39
- top_p=top_p,
40
- messages=messages,
41
- ):
42
- token = message.choices[0].delta.content
43
-
44
- response += token
45
- yield response
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  chatbot = gr.Chatbot(height=600)
48
 
49
  demo = gr.ChatInterface(
 
 
 
1
  import gradio as gr
2
+ from openai import OpenAI, APIError
3
  import os
4
+ import tenacity
5
 
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
 
 
10
  api_key=ACCESS_TOKEN,
11
  )
12
 
13
+ @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10))
14
  def respond(
15
  message,
16
  history: list[tuple[str, str]],
 
19
  temperature,
20
  top_p,
21
  ):
22
+ try:
23
+ messages = [{"role": "system", "content": system_message}]
24
 
25
+ for val in history:
26
+ if val[0]:
27
+ messages.append({"role": "user", "content": val[0]})
28
+ if val[1]:
29
+ messages.append({"role": "assistant", "content": val[1]})
30
 
31
+ messages.append({"role": "user", "content": message})
32
 
33
+ response = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ for message in client.chat.completions.create(
36
+ model="NousResearch/Hermes-3-Llama-3.1-8B",
37
+ max_tokens=max_tokens,
38
+ stream=True,
39
+ temperature=temperature,
40
+ top_p=top_p,
41
+ messages=messages,
42
+ ):
43
+ token = message.choices[0].delta.content
44
+
45
+ response += token
46
+ yield response
47
+ except APIError as e:
48
+ error_details = e.body
49
+ error_type = error_details.get("type")
50
+ error_code = error_details.get("code")
51
+ error_param = error_details.get("param")
52
+ error_message = error_details.get("message")
53
+
54
+ if error_type:
55
+ error_str = f"{error_type}: {error_message} (code: {error_code}, param: {error_param})"
56
+ else:
57
+ error_str = "An error occurred during streaming"
58
+
59
+ print(f"Error: {error_str}")
60
+ yield error_str
61
+ except Exception as e:
62
+ print(f"Error: {e}")
63
+ yield "Error occurred. Please try again."
64
+
65
  chatbot = gr.Chatbot(height=600)
66
 
67
  demo = gr.ChatInterface(