File size: 1,788 Bytes
e8bac0f 2db0d53 e8bac0f 2db0d53 e8bac0f 2db0d53 e8bac0f 2db0d53 83746e4 2db0d53 83746e4 2db0d53 83746e4 2db0d53 cde7a7b 2db0d53 a8032bb 2db0d53 a8032bb a6549b1 2db0d53 a6549b1 2db0d53 a6549b1 2db0d53 a6549b1 83746e4 2db0d53 83746e4 2db0d53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
import requests
# ์ถ๋ก API ํด๋ผ์ด์ธํธ ์ค์
hf_client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407", token=os.getenv("HF_TOKEN"))
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
system_prefix = """
๋ฐ๋์ ํ๊ธ๋ก ๋ต๋ณํ ๊ฒ.
"""
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}] # prefix ์ถ๊ฐ
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in hf_client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
if token is not None:
response += token.strip("") # ํ ํฐ ์ ๊ฑฐ
yield response
theme = "Nymbo/Nymbo_Theme"
css = """
footer {
visibility: hidden;
}
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="""
๋๋ AI ์ด์์คํดํธ์ด๋ค.
""", label="์์คํ
ํ๋กฌํํธ"),
gr.Slider(minimum=1, maximum=2000, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch() |