Spaces:
Runtime error
Runtime error
File size: 2,818 Bytes
47ab986 1f2af73 8381279 47ab986 f18ba88 0b0436e f18ba88 cffea61 2602485 cffea61 8381279 1f2af73 f18ba88 1f2af73 e33536d 8381279 e33536d 1f2af73 8381279 1f2af73 6cdd279 1f2af73 f18ba88 8381279 f18ba88 1f2af73 e33536d cdc1fda e33536d 47ab986 1f2af73 e33536d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
import requests
import os
import json
from collections import deque
# νκ²½ λ³μμμ API ν ν° κ°μ Έμ€κΈ°
TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
# API ν ν°μ΄ μ€μ λμ΄ μλμ§ νμΈ
if not TOKEN:
raise ValueError("API token is not set. Please set the HUGGINGFACE_API_TOKEN environment variable.")
# λν κΈ°λ‘μ κ΄λ¦¬νλ ν (μ΅λ 10κ°μ λν κΈ°λ‘μ μ μ§)
memory = deque(maxlen=10)
def respond(
message,
history: list[tuple[str, str]],
system_message="λμ μ΄λ¦μ νκΈΈλμ΄λ€",
max_tokens=512,
temperature=0.7,
top_p=0.95,
):
# μμ€ν
λ©μμ§μ μ λμ¬ μΆκ°
system_prefix = "System: "
full_system_message = f"{system_prefix}{system_message}"
# νμ¬ λν λ΄μ©μ λ©λͺ¨λ¦¬μ μΆκ°
memory.append((message, None))
messages = [{"role": "system", "content": full_system_message}]
# λ©λͺ¨λ¦¬μμ λν κΈ°λ‘μ κ°μ Έμ λ©μμ§ λͺ©λ‘μ μΆκ°
for val in memory:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
headers = {
"Authorization": f"Bearer {TOKEN}",
"Content-Type": "application/json"
}
payload = {
"model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
"messages": messages
}
response = requests.post("https://api-inference.huggingface.co/v1/chat/completions", headers=headers, json=payload, stream=True)
response_text = ""
for chunk in response.iter_content(chunk_size=None):
if chunk:
chunk_data = chunk.decode('utf-8')
response_json = json.loads(chunk_data)
# content μμλ§ μΆλ ₯
if "choices" in response_json:
content = response_json["choices"][0]["message"]["content"]
response_text = content
# λ§μ§λ§ λνμ λͺ¨λΈμ μλ΅μ μΆκ°νμ¬ λ©λͺ¨λ¦¬μ μ μ₯
memory[-1] = (message, response_text)
yield content
theme = "Nymbo/Nymbo_Theme"
# Gradio ChatInterface μ€μ
demo = gr.ChatInterface(
fn=respond,
theme=theme,
additional_inputs=[
gr.Textbox(value="λμ μ΄λ¦μ νκΈΈλμ΄λ€", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum 4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
)
if __name__ == "__main__":
demo.queue(concurrency_limit=20).launch(max_threads=20)
|