Spaces:
Running
Running
import gradio as gr | |
from huggingface_hub import InferenceClient | |
import os | |
from typing import List, Tuple | |
# Hugging Face ํ ํฐ ์ค์ | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
# Available LLM models | |
LLM_MODELS = { | |
"Mistral": "mistralai/Mistral-7B-Instruct-v0.2", | |
"Zephyr": "HuggingFaceH4/zephyr-7b-beta", | |
"OpenChat": "openchat/openchat-3.5", | |
"Llama2": "meta-llama/Llama-2-7b-chat-hf", | |
"Phi": "microsoft/phi-2", | |
"Neural": "nvidia/neural-chat-7b-v3-1", | |
"Starling": "HuggingFaceH4/starling-lm-7b-alpha" | |
} | |
# Default selected models | |
DEFAULT_MODELS = [ | |
"mistralai/Mistral-7B-Instruct-v0.2", | |
"HuggingFaceH4/zephyr-7b-beta", | |
"openchat/openchat-3.5" | |
] | |
# Initialize clients with token | |
clients = { | |
model: InferenceClient(model, token=HF_TOKEN) | |
for model in LLM_MODELS.values() | |
} | |
def process_file(file) -> str: | |
if file is None: | |
return "" | |
if file.name.endswith(('.txt', '.md')): | |
return file.read().decode('utf-8') | |
return f"Uploaded file: {file.name}" | |
def respond_single( | |
client, | |
message: str, | |
history: List[Tuple[str, str]], | |
system_message: str, | |
max_tokens: int, | |
temperature: float, | |
top_p: float, | |
): | |
system_prefix = """๋ฐ๋์ ํ๊ธ๋ก ๋ต๋ณํ ๊ฒ. ๋๋ ์ฃผ์ด์ง ๋ด์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ์์ธํ ์ค๋ช ๊ณผ Q&A๋ฅผ ์ ๊ณตํ๋ ์ญํ ์ด๋ค. | |
์์ฃผ ์น์ ํ๊ณ ์์ธํ๊ฒ ์ค๋ช ํ๋ผ.""" | |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}] | |
for user, assistant in history: | |
if user: | |
messages.append({"role": "user", "content": user}) | |
if assistant: | |
messages.append({"role": "assistant", "content": assistant}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
try: | |
for msg in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
if hasattr(msg.choices[0].delta, 'content'): | |
token = msg.choices[0].delta.content | |
if token is not None: | |
response += token | |
yield response | |
except Exception as e: | |
yield f"Error: {str(e)}" | |
def respond_all( | |
message: str, | |
file, | |
history1: List[Tuple[str, str]], | |
history2: List[Tuple[str, str]], | |
history3: List[Tuple[str, str]], | |
selected_models: List[str], | |
system_message: str, | |
max_tokens: int, | |
temperature: float, | |
top_p: float, | |
): | |
if file: | |
file_content = process_file(file) | |
message = f"{message}\n\nFile content:\n{file_content}" | |
while len(selected_models) < 3: | |
selected_models.append(selected_models[-1]) | |
def generate(client, history): | |
return respond_single( | |
client, | |
message, | |
history, | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
) | |
return ( | |
generate(clients[selected_models[0]], history1), | |
generate(clients[selected_models[1]], history2), | |
generate(clients[selected_models[2]], history3), | |
) | |
css = """ | |
footer {visibility: hidden} | |
""" | |
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo: | |
with gr.Row(): | |
model_choices = gr.Checkboxgroup( | |
choices=list(LLM_MODELS.values()), | |
value=DEFAULT_MODELS, | |
label="Select Models (Choose up to 3)", | |
interactive=True | |
) | |
with gr.Row(): | |
with gr.Column(): | |
chat1 = gr.ChatInterface( | |
lambda message, history: None, | |
chatbot=gr.Chatbot(height=400, label="Chat 1"), | |
textbox=False, | |
) | |
with gr.Column(): | |
chat2 = gr.ChatInterface( | |
lambda message, history: None, | |
chatbot=gr.Chatbot(height=400, label="Chat 2"), | |
textbox=False, | |
) | |
with gr.Column(): | |
chat3 = gr.ChatInterface( | |
lambda message, history: None, | |
chatbot=gr.Chatbot(height=400, label="Chat 3"), | |
textbox=False, | |
) | |
with gr.Row(): | |
with gr.Column(): | |
system_message = gr.Textbox( | |
value="๋น์ ์ ์น์ ํ AI ์ด์์คํดํธ์ ๋๋ค.", | |
label="System message" | |
) | |
max_tokens = gr.Slider( | |
minimum=1, | |
maximum=8000, | |
value=4000, | |
step=1, | |
label="Max new tokens" | |
) | |
temperature = gr.Slider( | |
minimum=0, | |
maximum=1, | |
value=0.7, | |
step=0.1, | |
label="Temperature" | |
) | |
top_p = gr.Slider( | |
minimum=0, | |
maximum=1, | |
value=0.9, | |
step=0.05, | |
label="Top-p" | |
) | |
with gr.Row(): | |
file_input = gr.File(label="Upload File (optional)") | |
msg_input = gr.Textbox( | |
show_label=False, | |
placeholder="Enter text and press enter", | |
container=False | |
) | |
examples = [ | |
["์์ธํ ์ฌ์ฉ ๋ฐฉ๋ฒ์ ๋ง์น ํ๋ฉด์ ๋ณด๋ฉด์ ์ค๋ช ํ๋ฏ์ด 4000 ํ ํฐ ์ด์ ์์ธํ ์ค๋ช ํ๋ผ"], | |
["FAQ 20๊ฑด์ ์์ธํ๊ฒ ์์ฑํ๋ผ. 4000ํ ํฐ ์ด์ ์ฌ์ฉํ๋ผ."], | |
["์ฌ์ฉ ๋ฐฉ๋ฒ๊ณผ ์ฐจ๋ณ์ , ํน์ง, ๊ฐ์ ์ ์ค์ฌ์ผ๋ก 4000 ํ ํฐ ์ด์ ์ ํ๋ธ ์์ ์คํฌ๋ฆฝํธ ํํ๋ก ์์ฑํ๋ผ"], | |
["๋ณธ ์๋น์ค๋ฅผ SEO ์ต์ ํํ์ฌ ๋ธ๋ก๊ทธ ํฌ์คํธ๋ก 4000 ํ ํฐ ์ด์ ์์ฑํ๋ผ"], | |
["๊ณ์ ์ด์ด์ ๋ต๋ณํ๋ผ"], | |
] | |
gr.Examples( | |
examples=examples, | |
inputs=msg_input, | |
cache_examples=False | |
) | |
def submit_message(message, file): | |
return respond_all( | |
message, | |
file, | |
chat1.chatbot.value, | |
chat2.chatbot.value, | |
chat3.chatbot.value, | |
model_choices.value, | |
system_message.value, | |
max_tokens.value, | |
temperature.value, | |
top_p.value, | |
) | |
msg_input.submit( | |
submit_message, | |
[msg_input, file_input], | |
[chat1.chatbot, chat2.chatbot, chat3.chatbot], | |
api_name="submit" | |
) | |
if __name__ == "__main__": | |
if not HF_TOKEN: | |
print("Warning: HF_TOKEN environment variable is not set") | |
demo.launch() |