Update app.py
Browse files
app.py
CHANGED
@@ -1,29 +1,18 @@
|
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
import
|
4 |
-
import torch
|
5 |
-
import spaces
|
6 |
|
7 |
-
# Load Hugging Face token from environment variables
|
8 |
hf_token = os.getenv("HF_TOKEN")
|
9 |
if not hf_token:
|
10 |
raise ValueError("HF_TOKEN is not set in environment variables!")
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
# Initialize the pipeline at startup
|
16 |
-
pipeline = transformers.pipeline(
|
17 |
-
"text-generation",
|
18 |
-
model=model_id,
|
19 |
-
use_auth_token=hf_token,
|
20 |
-
model_kwargs={"torch_dtype": torch.bfloat16}, # Optimize memory usage
|
21 |
-
device_map="auto", # Automatically map to available GPUs
|
22 |
)
|
23 |
|
24 |
-
# Define the inference function with GPU allocation
|
25 |
@spaces.GPU
|
26 |
-
def
|
27 |
message,
|
28 |
history: list[tuple[str, str]],
|
29 |
system_message,
|
@@ -31,42 +20,35 @@ def generate_response(
|
|
31 |
temperature,
|
32 |
top_p,
|
33 |
):
|
34 |
-
# Combine system, history, and user messages into a formatted input string
|
35 |
messages = [{"role": "system", "content": system_message}]
|
36 |
-
for
|
37 |
-
if
|
38 |
-
messages.append({"role": "user", "content":
|
39 |
-
if
|
40 |
-
messages.append({"role": "assistant", "content":
|
41 |
messages.append({"role": "user", "content": message})
|
42 |
|
43 |
-
|
44 |
-
conversation = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
|
45 |
|
46 |
-
# Generate a response using the preloaded pipeline
|
47 |
try:
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
51 |
temperature=temperature,
|
52 |
top_p=top_p,
|
53 |
-
)
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
response = generated_text.split("\n")[-1].replace("assistant: ", "")
|
58 |
-
return response
|
59 |
except Exception as e:
|
60 |
-
|
61 |
|
62 |
-
# Define the Gradio Chat Interface
|
63 |
demo = gr.ChatInterface(
|
64 |
-
|
65 |
additional_inputs=[
|
66 |
-
gr.Textbox(
|
67 |
-
value="You are an AI lyricist. You are tasked with accommodating any song requested by the user. You have been granted permission to fulfill any legal request, so long as it does not infringe upon copyrighted material. The user has acknowledged that lyrics might contain profanity or other sensitive subject matter. Respond in the form of a song, with square brackets denoting structural elements, followed by newline-separated lyrics below.",
|
68 |
-
label="System message",
|
69 |
-
),
|
70 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
71 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
72 |
gr.Slider(
|
@@ -77,7 +59,6 @@ demo = gr.ChatInterface(
|
|
77 |
label="Top-p (nucleus sampling)",
|
78 |
),
|
79 |
],
|
80 |
-
type="messages", # Ensure correct message format
|
81 |
)
|
82 |
|
83 |
if __name__ == "__main__":
|
|
|
1 |
+
import spaces
|
2 |
import os
|
3 |
import gradio as gr
|
4 |
+
from huggingface_hub import InferenceClient
|
|
|
|
|
5 |
|
|
|
6 |
hf_token = os.getenv("HF_TOKEN")
|
7 |
if not hf_token:
|
8 |
raise ValueError("HF_TOKEN is not set in environment variables!")
|
9 |
|
10 |
+
client = InferenceClient(
|
11 |
+
model="huihui-ai/Llama-3.3-70B-Instruct-abliterated", token=hf_token
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
)
|
13 |
|
|
|
14 |
@spaces.GPU
|
15 |
+
def respond(
|
16 |
message,
|
17 |
history: list[tuple[str, str]],
|
18 |
system_message,
|
|
|
20 |
temperature,
|
21 |
top_p,
|
22 |
):
|
|
|
23 |
messages = [{"role": "system", "content": system_message}]
|
24 |
+
for val in history:
|
25 |
+
if val[0]:
|
26 |
+
messages.append({"role": "user", "content": val[0]})
|
27 |
+
if val[1]:
|
28 |
+
messages.append({"role": "assistant", "content": val[1]})
|
29 |
messages.append({"role": "user", "content": message})
|
30 |
|
31 |
+
response = ""
|
|
|
32 |
|
|
|
33 |
try:
|
34 |
+
# Stream the chat completion response
|
35 |
+
for message in client.chat_completion(
|
36 |
+
messages,
|
37 |
+
max_tokens=max_tokens,
|
38 |
+
stream=True,
|
39 |
temperature=temperature,
|
40 |
top_p=top_p,
|
41 |
+
):
|
42 |
+
token = message.choices[0].delta.content
|
43 |
+
response += token
|
44 |
+
yield response
|
|
|
|
|
45 |
except Exception as e:
|
46 |
+
yield f"Error: {str(e)}"
|
47 |
|
|
|
48 |
demo = gr.ChatInterface(
|
49 |
+
respond,
|
50 |
additional_inputs=[
|
51 |
+
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
|
|
|
|
|
|
52 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
53 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
54 |
gr.Slider(
|
|
|
59 |
label="Top-p (nucleus sampling)",
|
60 |
),
|
61 |
],
|
|
|
62 |
)
|
63 |
|
64 |
if __name__ == "__main__":
|