Jyothikamalesh commited on
Commit
a5bb25c
·
verified ·
1 Parent(s): dd4cd9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -62
app.py CHANGED
@@ -1,83 +1,56 @@
1
  import gradio as gr
 
2
  import os
3
- import openai
4
- import tenacity
5
  import nest_asyncio
6
- import asyncio
7
 
8
  nest_asyncio.apply()
9
 
10
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
11
- openai.api_key = ACCESS_TOKEN
12
 
13
- # Retry logic with tenacity for handling API rate limits
14
- @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
15
- async def respond(
 
 
 
16
  message,
17
  system_message,
18
  max_tokens,
19
  temperature,
20
  top_p,
21
  ):
 
 
 
 
 
22
  try:
23
- print("Making request to OpenAI API...")
24
- # Only use the system message and the current message for the response
25
- messages = [{"role": "system", "content": system_message},
26
- {"role": "user", "content": message}]
27
-
28
- response = await openai.Completion.create(
29
- model="text-davinci-003",
30
- prompt=system_message + "\n" + message,
31
  max_tokens=max_tokens,
32
  temperature=temperature,
33
- top_p=top_p,
34
  )
35
-
36
- print("Received response from OpenAI API...")
37
- response_text = response.choices[0].text
38
- print("Response text:", response_text)
39
- return response_text
40
-
41
- except openai.error.APIError as e:
42
- print("APIError:", e)
43
- return "Error occurred. Please try again."
44
-
45
  except Exception as e:
46
- print("Exception:", e)
47
- return "Error occurred. Please try again."
48
-
49
-
50
- # Gradio function to handle user input and response generation without history
51
- def generate_response(message, system_message, max_tokens, temperature, top_p):
52
- loop = asyncio.new_event_loop()
53
- asyncio.set_event_loop(loop)
54
- response = loop.run_until_complete(respond(message, system_message, max_tokens, temperature, top_p))
55
- return response
56
-
57
-
58
- def launch_app():
59
- try:
60
- demo = gr.Blocks()
61
- with demo:
62
- gr.Markdown("# Chatbot")
63
- message = gr.Textbox(label="Message")
64
- system_message = gr.Textbox(label="System message")
65
- max_tokens = gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens")
66
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
67
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
68
- response = gr.Text(label="Response")
69
-
70
- # Use the generate_response function without history
71
- gr.Button("Generate Response").click(
72
- generate_response,
73
- inputs=[message, system_message, max_tokens, temperature, top_p],
74
- outputs=[response],
75
- show_progress=False,
76
- )
77
- demo.launch(show_error=True)
78
- except KeyError as e:
79
- print("Error:", e)
80
- print("Please try again.")
81
 
82
  if __name__ == "__main__":
83
- launch_app()
 
1
  import gradio as gr
2
+ from openai import OpenAI
3
  import os
 
 
4
  import nest_asyncio
 
5
 
6
  nest_asyncio.apply()
7
 
8
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
9
 
10
+ client = OpenAI(
11
+ base_url="https://api-inference.huggingface.co/v1/",
12
+ api_key=ACCESS_TOKEN,
13
+ )
14
+
15
+ def respond(
16
  message,
17
  system_message,
18
  max_tokens,
19
  temperature,
20
  top_p,
21
  ):
22
+ messages = [
23
+ {"role": "system", "content": system_message},
24
+ {"role": "user", "content": message}
25
+ ]
26
+
27
  try:
28
+ print("Making request to API...")
29
+ response = client.chat.completions.create(
30
+ model="NousResearch/Hermes-3-Llama-3.1-8B",
31
+ messages=messages,
 
 
 
 
32
  max_tokens=max_tokens,
33
  temperature=temperature,
34
+ top_p=top_p
35
  )
36
+ return response.choices[0].message.content
 
 
 
 
 
 
 
 
 
37
  except Exception as e:
38
+ print(f"Error: {str(e)}")
39
+ return f"Error: {str(e)}"
40
+
41
+ # Create simple Gradio interface without chat memory
42
+ iface = gr.Interface(
43
+ fn=respond,
44
+ inputs=[
45
+ gr.Textbox(label="Message", lines=4),
46
+ gr.Textbox(label="System Message", value="You are a helpful assistant."),
47
+ gr.Slider(minimum=1, maximum=2048, value=512, label="Max Tokens"),
48
+ gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature"),
49
+ gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
50
+ ],
51
+ outputs="text",
52
+ title="Hermes-3-Llama Chat (No Memory)"
53
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  if __name__ == "__main__":
56
+ iface.launch()