File size: 3,486 Bytes
9c880cb
 
5bdf9aa
 
 
9c880cb
6c72519
a5db718
 
 
 
 
 
 
 
 
6c72519
a5db718
 
6c72519
 
a5db718
1e06dbb
5bdf9aa
 
 
 
6c72519
5bdf9aa
 
 
6c72519
a5db718
 
 
 
bf4739d
5bdf9aa
6c72519
9c880cb
1e06dbb
 
 
6c9ad89
1e06dbb
 
6c72519
 
93b41fc
 
 
 
6c72519
93b41fc
 
 
 
 
6c72519
 
 
cbafc8c
6c72519
 
 
 
 
 
cbafc8c
 
 
 
9c880cb
 
5bdf9aa
 
cbafc8c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
from huggingface_hub import InferenceClient
import os

hf_token = os.getenv("HF_TOKEN")

def get_model_response(client, messages, max_tokens, temperature, top_p):
    try:
        response = client.chat_completion(
            messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            stream=True
        )
        for message in response:
            token = message.choices[0].delta.content if hasattr(message.choices[0], 'delta') else message.choices[0].text
            if token:
                yield token
    except Exception as e:
        yield f"모델 추론 실패: {str(e)}"

def respond(message, history, system_message, max_tokens, temperature, top_p, selected_model):
    try:
        client = InferenceClient(model=selected_model, token=hf_token)
        
        messages = [{"role": "system", "content": system_message}]
        messages.extend([{"role": "user" if i % 2 == 0 else "assistant", "content": m} for h in history for i, m in enumerate(h) if m])
        messages.append({"role": "user", "content": message})
        
        response = ""
        for token in get_model_response(client, messages, max_tokens, temperature, top_p):
            response += token
            yield response
        
        if not response:
            yield "모델이 응답을 생성하지 못했습니다. 다른 입력이나 모델을 시도해보세요."
    except Exception as e:
        yield f"오류 발생: {str(e)}"

models = {
    "deepseek-ai/DeepSeek-Coder-V2-Instruct": "DeepSeek-Coder-V2-Instruct",
    "CohereForAI/c4ai-command-r-plus": "Cohere Command-R Plus",
    "meta-llama/Meta-Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct"
}

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    
    with gr.Row():
        msg = gr.Textbox(scale=4, label="메시지 입력")
        send = gr.Button("전송", scale=1)

    with gr.Row():
        regenerate = gr.Button("🔄 재생성")
        clear = gr.Button("🗑️ 대화 내역 지우기")

    with gr.Accordion("추가 설정", open=True):
        system_message = gr.Textbox(
            value="너는 나의 최고의 비서이다.\n내가 요구하는것들을 최대한 자세하고 정확하게 답변하라.\n반드시 한글로 답변할것.",
            label="시스템 메시지",
            lines=10
        )
        max_tokens = gr.Slider(minimum=1, maximum=2000, value=500, step=100, label="최대 새 토큰 수")
        temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="온도")
        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.90, step=0.05, label="Top-p (핵 샘플링)")
        model = gr.Radio(list(models.keys()), value=list(models.keys())[0], label="언어 모델 선택", info="사용할 언어 모델을 선택하세요")

    send.click(respond, inputs=[msg, chatbot, system_message, max_tokens, temperature, top_p, model], outputs=chatbot)
    msg.submit(respond, inputs=[msg, chatbot, system_message, max_tokens, temperature, top_p, model], outputs=chatbot)
    regenerate.click(respond, inputs=[msg, chatbot, system_message, max_tokens, temperature, top_p, model], outputs=chatbot)
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    if not hf_token:
        print("경고: HF_TOKEN 환경 변수가 설정되지 않았습니다. 일부 모델에 접근할 수 없을 수 있습니다.")
    demo.launch()