Jyothikamalesh commited on
Commit
d3a5649
·
verified ·
1 Parent(s): 2efa6f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -85
app.py CHANGED
@@ -1,98 +1,51 @@
1
  import gradio as gr
2
- from openai import OpenAI, APIError
3
- import os
4
- import tenacity
5
- import asyncio
6
 
7
- ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
8
 
9
- client = OpenAI(
10
- base_url="https://api-inference.huggingface.co/v1/",
11
- api_key=ACCESS_TOKEN,
12
- )
13
 
14
- # Retry logic with tenacity for handling API rate limits
15
- @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
16
- async def respond(
17
  message,
 
18
  system_message,
19
  max_tokens,
20
  temperature,
21
  top_p,
22
  ):
23
- try:
24
- # Only use the system message and the current message for the response
25
- messages = [{"role": "system", "content": system_message},
26
- {"role": "user", "content": message}]
27
-
28
- response = ""
29
- # Properly stream chat completions using dot notation
30
- stream = client.chat.completions.create(
31
- model="NousResearch/Hermes-3-Llama-3.1-8B",
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- messages=messages,
37
- )
38
-
39
- # Stream response and concatenate tokens
40
- for chunk in stream:
41
- if hasattr(chunk.choices[0].delta, 'content'):
42
- token = chunk.choices[0].delta.content
43
- response += token
44
-
45
- return response
46
-
47
- except APIError as e:
48
- # Handle both string and dict types of error bodies
49
- error_details = e.body
50
- if isinstance(error_details, dict):
51
- error_type = error_details.get("type", "Unknown")
52
- error_code = error_details.get("code", "Unknown")
53
- error_param = error_details.get("param", "Unknown")
54
- error_message = error_details.get("message", "An error occurred.")
55
- error_str = f"{error_type}: {error_message} (code: {error_code}, param: {error_param})"
56
- else:
57
- error_str = f"Error: {error_details}"
58
-
59
- print(f"APIError: {error_str}")
60
- return error_str
61
-
62
- except Exception as e:
63
- print(f"Exception: {e}")
64
- return "Error occurred. Please try again."
65
-
66
-
67
- # Async Gradio function to handle user input and response generation without history
68
- async def generate_response(message, system_message, max_tokens, temperature, top_p):
69
- response = await respond(message, system_message, max_tokens, temperature, top_p)
70
- return response
71
-
72
-
73
- def launch_app():
74
- try:
75
- demo = gr.Blocks()
76
- with demo:
77
- gr.Markdown("# Chatbot")
78
- message = gr.Textbox(label="Message")
79
- system_message = gr.Textbox(label="System message")
80
- max_tokens = gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens")
81
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
82
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
83
- response = gr.Text(label="Response")
84
 
85
- # Use the async version of generate_response without history
86
- gr.Button("Generate Response").click(
87
- generate_response,
88
- inputs=[message, system_message, max_tokens, temperature, top_p],
89
- outputs=[response],
90
- show_progress=False,
91
- )
92
- demo.launch(show_error=True)
93
- except KeyError as e:
94
- print(f"Error: {e}")
95
- print("Please try again.")
96
 
97
  if __name__ == "__main__":
98
- launch_app()
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
 
 
 
3
 
4
+ # Initialize the InferenceClient with the model name
5
+ client = InferenceClient("NousResearch/Hermes-3-Llama-3.1-8B")
6
 
 
 
 
 
7
 
8
+ def respond(
 
 
9
  message,
10
+ history,
11
  system_message,
12
  max_tokens,
13
  temperature,
14
  top_p,
15
  ):
16
+ # Create a list of messages with the system message and user input
17
+ messages = [{"role": "system", "content": system_message}, {"role": "user", "content": message}]
18
+
19
+ # Get the response from the model
20
+ response = client.chat_completion(
21
+ messages,
22
+ max_tokens=max_tokens,
23
+ stream=False,
24
+ temperature=temperature,
25
+ top_p=top_p,
26
+ )
27
+
28
+ # Return the response
29
+ return response.choices[0].message.content
30
+
31
+
32
+ # Create a ChatInterface with the respond function and additional inputs
33
+ demo = gr.ChatInterface(
34
+ respond,
35
+ additional_inputs=[
36
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
37
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
38
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
39
+ gr.Slider(
40
+ minimum=0.1,
41
+ maximum=1.0,
42
+ value=0.95,
43
+ step=0.05,
44
+ label="Top-p (nucleus sampling)",
45
+ ),
46
+ ],
47
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  if __name__ == "__main__":
51
+ demo.launch()