File size: 3,419 Bytes
9c880cb
 
5bdf9aa
b4dff1d
5bdf9aa
 
b4dff1d
 
30bf3f3
32957d4
 
 
 
 
 
 
b08a6f9
32957d4
 
 
b08a6f9
87dda7a
32957d4
8ae421b
32957d4
b08a6f9
87dda7a
b08a6f9
87dda7a
 
32957d4
a5db718
871126f
30bf3f3
 
b08a6f9
871126f
 
 
a5db718
 
b4dff1d
32957d4
b4dff1d
 
871126f
 
30bf3f3
 
871126f
5bdf9aa
30bf3f3
 
 
8ae421b
 
 
30bf3f3
1e06dbb
b08a6f9
30bf3f3
 
 
 
 
8ae421b
6c72519
b08a6f9
 
 
 
 
 
 
 
 
 
 
 
 
 
4487f96
30bf3f3
b08a6f9
9c880cb
30bf3f3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
from huggingface_hub import InferenceClient
import os
from threading import Event

hf_token = os.getenv("HF_TOKEN")
stop_event = Event()

# 모델 목록 정의
models = {
    "deepseek-ai/DeepSeek-Coder-V2-Instruct": "(한국회사)DeepSeek-Coder-V2-Instruct",
    "meta-llama/Meta-Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
    "mistralai/Mixtral-8x7B-Instruct-v0.1": "Mixtral-8x7B-Instruct-v0.1",
    "CohereForAI/c4ai-command-r-plus": "Cohere Command-R Plus"
}

# Inference 클라이언트 반환
def get_client(model):
    return InferenceClient(model=model, token=hf_token)

# 응답 생성 함수
def respond(message, system_message, max_tokens, temperature, top_p, selected_model):
    stop_event.clear()
    client = get_client(selected_model)
    
    # 프롬프트 설정 - 시스템 메시지를 자유롭게 설정 가능
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": message}
    ]
    
    try:
        response = ""
        total_tokens_used = 0  # 사용된 토큰 수 추적
        
        # 모델에서 응답을 스트리밍
        for chunk in client.text_generation(
            prompt="\n".join([f"{m['role']}: {m['content']}" for m in messages]),
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            stream=True
        ):
            if stop_event.is_set():
                break
            if chunk:
                response += chunk
                total_tokens_used += len(chunk.split())  # 청크당 사용된 토큰 수 추산
                yield [(message, response, f"사용된 토큰 수: {total_tokens_used}/{max_tokens}")]
        
    except Exception as e:
        yield [(message, f"오류 발생: {str(e)}", "에러 처리 필요")]

# 이전 응답을 확인하는 함수
def get_last_response(chatbot):
    if chatbot and len(chatbot) > 0:
        return chatbot[-1][1]
    return None

# Gradio UI 구성
def gradio_interface(message, system_message, max_tokens, temperature, top_p, selected_model):
    result = None
    for output in respond(message, system_message, max_tokens, temperature, top_p, selected_model):
        result = output
    return result

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            selected_model = gr.Dropdown(choices=list(models.keys()), value="deepseek-ai/DeepSeek-Coder-V2-Instruct", label="모델 선택")
            system_message = gr.Textbox(label="시스템 메시지", value="이 메시지를 기준으로 대화 흐름을 설정합니다.")
            message = gr.Textbox(label="사용자 메시지")
            
            max_tokens = gr.Slider(minimum=10, maximum=512, value=128, label="최대 토큰 수")
            temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature")
            top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top-p")
            
            submit_button = gr.Button("응답 생성")
        with gr.Column():
            chatbot = gr.Chatbot()
            token_usage = gr.Textbox(label="토큰 사용량", interactive=False)
    
    # 버튼을 눌러 응답을 받는 함수 연결
    submit_button.click(gradio_interface, inputs=[message, system_message, max_tokens, temperature, top_p, selected_model], outputs=[chatbot, token_usage])

# UI 실행
demo.launch()