Nymbo commited on
Commit
5b1509d
·
verified ·
1 Parent(s): 8beac4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -36
app.py CHANGED
@@ -2,17 +2,15 @@ import gradio as gr
2
  from openai import OpenAI
3
  import os
4
 
5
- # Load the Hugging Face access token from environment variables
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
-
8
  print("Access token loaded.")
9
 
10
- # Initialize the OpenAI client with Hugging Face's serverless API
11
  client = OpenAI(
12
  base_url="https://api-inference.huggingface.co/v1/",
13
  api_key=ACCESS_TOKEN,
14
  )
15
-
16
  print("OpenAI client initialized.")
17
 
18
  def respond(
@@ -23,79 +21,96 @@ def respond(
23
  temperature,
24
  top_p,
25
  frequency_penalty,
26
- seed,
27
  ):
 
 
 
 
 
 
 
 
 
 
 
 
28
  print(f"Received message: {message}")
29
  print(f"History: {history}")
30
  print(f"System message: {system_message}")
31
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
32
- print(f"Frequency penalty: {frequency_penalty}, Seed: {seed}")
 
 
 
 
33
 
34
- # Construct the messages list for the conversation context
35
  messages = [{"role": "system", "content": system_message}]
36
 
 
37
  for val in history:
38
- if val[0]:
39
- messages.append({"role": "user", "content": val[0]})
40
- print(f"Added user message to context: {val[0]}")
41
- if val[1]:
42
- messages.append({"role": "assistant", "content": val[1]})
43
- print(f"Added assistant message to context: {val[1]}")
 
 
44
 
 
45
  messages.append({"role": "user", "content": message})
46
 
 
47
  response = ""
48
  print("Sending request to OpenAI API.")
49
 
50
- for message in client.chat.completions.create(
51
- model="meta-llama/Llama-3.3-70B-Instruct",
 
52
  max_tokens=max_tokens,
53
- stream=True,
54
  temperature=temperature,
55
  top_p=top_p,
56
- frequency_penalty=frequency_penalty,
57
- seed=seed,
58
  messages=messages,
59
  ):
60
- token = message.choices[0].delta.content
61
- print(f"Received token: {token}")
62
- response += token
 
63
  yield response
64
 
65
  print("Completed response generation.")
66
 
67
- # Initialize the chatbot interface
68
  chatbot = gr.Chatbot(height=600)
69
-
70
  print("Chatbot interface created.")
71
 
72
- # Create the Gradio interface with additional inputs for the new parameters
 
73
  demo = gr.ChatInterface(
74
  respond,
75
  additional_inputs=[
76
  gr.Textbox(value="", label="System message"),
77
- gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
78
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
79
- gr.Slider(
80
- minimum=0.1,
81
- maximum=1.0,
82
- value=0.95,
83
- step=0.05,
84
- label="Top-P",
85
- ),
86
  gr.Slider(
87
  minimum=-2.0,
88
  maximum=2.0,
89
  value=0.0,
90
  step=0.1,
91
- label="Frequency Penalty",
92
  ),
93
  gr.Slider(
94
  minimum=-1,
95
- maximum=2**31 - 1,
96
  value=-1,
97
  step=1,
98
- label="Seed",
99
  ),
100
  ],
101
  fill_height=True,
 
2
  from openai import OpenAI
3
  import os
4
 
5
+ # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
7
  print("Access token loaded.")
8
 
9
+ # Initialize the OpenAI client with the Hugging Face Inference API endpoint
10
  client = OpenAI(
11
  base_url="https://api-inference.huggingface.co/v1/",
12
  api_key=ACCESS_TOKEN,
13
  )
 
14
  print("OpenAI client initialized.")
15
 
16
  def respond(
 
21
  temperature,
22
  top_p,
23
  frequency_penalty,
24
+ seed
25
  ):
26
+ """
27
+ This function handles the chatbot response. It takes in:
28
+ - message: the user's new message
29
+ - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
30
+ - system_message: the system prompt
31
+ - max_tokens: the maximum number of tokens to generate in the response
32
+ - temperature: sampling temperature
33
+ - top_p: top-p (nucleus) sampling
34
+ - frequency_penalty: penalize repeated tokens in the output
35
+ - seed: a fixed seed for reproducibility; -1 will mean 'random'
36
+ """
37
+
38
  print(f"Received message: {message}")
39
  print(f"History: {history}")
40
  print(f"System message: {system_message}")
41
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
42
+ print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
43
+
44
+ # Convert seed to None if -1 (meaning random)
45
+ if seed == -1:
46
+ seed = None
47
 
48
+ # Construct the messages array required by the API
49
  messages = [{"role": "system", "content": system_message}]
50
 
51
+ # Add conversation history to the context
52
  for val in history:
53
+ user_part = val[0]
54
+ assistant_part = val[1]
55
+ if user_part:
56
+ messages.append({"role": "user", "content": user_part})
57
+ print(f"Added user message to context: {user_part}")
58
+ if assistant_part:
59
+ messages.append({"role": "assistant", "content": assistant_part})
60
+ print(f"Added assistant message to context: {assistant_part}")
61
 
62
+ # Append the latest user message
63
  messages.append({"role": "user", "content": message})
64
 
65
+ # Start with an empty string to build the response as tokens stream in
66
  response = ""
67
  print("Sending request to OpenAI API.")
68
 
69
+ # Make the streaming request to the HF Inference API via openai-like client
70
+ for message_chunk in client.chat.completions.create(
71
+ model="meta-llama/Llama-3.3-70B-Instruct", # You can update this to your specific model
72
  max_tokens=max_tokens,
73
+ stream=True, # Stream the response
74
  temperature=temperature,
75
  top_p=top_p,
76
+ frequency_penalty=frequency_penalty, # <-- NEW
77
+ seed=seed, # <-- NEW
78
  messages=messages,
79
  ):
80
+ # Extract the token text from the response chunk
81
+ token_text = message_chunk.choices[0].delta.content
82
+ print(f"Received token: {token_text}")
83
+ response += token_text
84
  yield response
85
 
86
  print("Completed response generation.")
87
 
88
+ # Create a Chatbot component with a specified height
89
  chatbot = gr.Chatbot(height=600)
 
90
  print("Chatbot interface created.")
91
 
92
+ # Create the Gradio ChatInterface
93
+ # We add two new sliders for Frequency Penalty and Seed
94
  demo = gr.ChatInterface(
95
  respond,
96
  additional_inputs=[
97
  gr.Textbox(value="", label="System message"),
98
+ gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
99
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
100
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
 
 
 
 
 
 
101
  gr.Slider(
102
  minimum=-2.0,
103
  maximum=2.0,
104
  value=0.0,
105
  step=0.1,
106
+ label="Frequency Penalty"
107
  ),
108
  gr.Slider(
109
  minimum=-1,
110
+ maximum=65535, # Arbitrary upper limit for demonstration
111
  value=-1,
112
  step=1,
113
+ label="Seed (-1 for random)"
114
  ),
115
  ],
116
  fill_height=True,