File size: 3,397 Bytes
47ab986
1f2af73
 
8381279
 
47ab986
f18ba88
 
0b0436e
f18ba88
cffea61
2602485
cffea61
8381279
 
 
1f2af73
 
 
f18ba88
 
 
 
1f2af73
8381279
 
 
1f2af73
 
8381279
 
1f2af73
 
 
 
 
 
 
 
 
 
 
6cdd279
1f2af73
 
 
 
 
 
 
 
 
 
 
f18ba88
 
 
 
 
8381279
 
 
f18ba88
1f2af73
 
cdc1fda
8f4901f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47ab986
1f2af73
8f4901f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
import requests
import os
import json
from collections import deque

# ν™˜κ²½ λ³€μˆ˜μ—μ„œ API 토큰 κ°€μ Έμ˜€κΈ°
TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")

# API 토큰이 μ„€μ •λ˜μ–΄ μžˆλŠ”μ§€ 확인
if not TOKEN:
    raise ValueError("API token is not set. Please set the HUGGINGFACE_API_TOKEN environment variable.")

# λŒ€ν™” 기둝을 κ΄€λ¦¬ν•˜λŠ” 큐 (μ΅œλŒ€ 10개의 λŒ€ν™” 기둝을 μœ μ§€)
memory = deque(maxlen=10)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message="λ„ˆμ˜ 이름은 홍길동이닀",
    max_tokens=512,
    temperature=0.7,
    top_p=0.95,
):
    # ν˜„μž¬ λŒ€ν™” λ‚΄μš©μ„ λ©”λͺ¨λ¦¬μ— μΆ”κ°€
    memory.append((message, None))

    messages = [{"role": "system", "content": system_message}]

    # λ©”λͺ¨λ¦¬μ—μ„œ λŒ€ν™” 기둝을 가져와 λ©”μ‹œμ§€ λͺ©λ‘μ— μΆ”κ°€
    for val in memory:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    headers = {
        "Authorization": f"Bearer {TOKEN}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
        "max_tokens": max_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "messages": messages
    }

    response = requests.post("https://api-inference.huggingface.co/v1/chat/completions", headers=headers, json=payload, stream=True)
    
    response_text = ""
    for chunk in response.iter_content(chunk_size=None):
        if chunk:
            chunk_data = chunk.decode('utf-8')
            response_json = json.loads(chunk_data)
            # content μ˜μ—­λ§Œ 좜λ ₯
            if "choices" in response_json:
                content = response_json["choices"][0]["message"]["content"]
                response_text = content
                # λ§ˆμ§€λ§‰ λŒ€ν™”μ— λͺ¨λΈμ˜ 응닡을 μΆ”κ°€ν•˜μ—¬ λ©”λͺ¨λ¦¬μ— μ €μž₯
                memory[-1] = (message, response_text)
                yield content

theme="Nymbo/Nymbo_Theme"

with gr.Blocks(css=None, theme=theme) as demo:
    with gr.Tab("Playground"):
        gr.Markdown("## Playground")
        with gr.Row():
            input_text = gr.Textbox(label="Enter your text")
            submit_button = gr.Button("Submit")
            output_text = gr.Textbox(label="Processed Text")
            submit_button.click(
                fn=respond,
                inputs=[input_text, output_text],
                outputs=output_text
            )
        additional_inputs = [
            gr.Textbox(value="λ„ˆμ˜ 이름은 홍길동이닀", label="System message"),
            gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
            gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
            gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
        ]
        
    with gr.Tab("Guide"):
        gr.Markdown("## Guide")
        gr.Markdown("""
        ### How to use:
        - Use the Playground tab to interact with the chatbot.
        - Adjust the parameters to see how the model's responses change.
        - Explore different system messages to guide the conversation.
        """)

if __name__ == "__main__":
    demo.queue(concurrency_count=20).launch()