Spaces:
Sleeping
Sleeping
File size: 3,419 Bytes
9c880cb 5bdf9aa b4dff1d 5bdf9aa b4dff1d 30bf3f3 32957d4 b08a6f9 32957d4 b08a6f9 87dda7a 32957d4 8ae421b 32957d4 b08a6f9 87dda7a b08a6f9 87dda7a 32957d4 a5db718 871126f 30bf3f3 b08a6f9 871126f a5db718 b4dff1d 32957d4 b4dff1d 871126f 30bf3f3 871126f 5bdf9aa 30bf3f3 8ae421b 30bf3f3 1e06dbb b08a6f9 30bf3f3 8ae421b 6c72519 b08a6f9 4487f96 30bf3f3 b08a6f9 9c880cb 30bf3f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
from threading import Event
hf_token = os.getenv("HF_TOKEN")
stop_event = Event()
# 모델 목록 정의
models = {
"deepseek-ai/DeepSeek-Coder-V2-Instruct": "(한국회사)DeepSeek-Coder-V2-Instruct",
"meta-llama/Meta-Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
"mistralai/Mixtral-8x7B-Instruct-v0.1": "Mixtral-8x7B-Instruct-v0.1",
"CohereForAI/c4ai-command-r-plus": "Cohere Command-R Plus"
}
# Inference 클라이언트 반환
def get_client(model):
return InferenceClient(model=model, token=hf_token)
# 응답 생성 함수
def respond(message, system_message, max_tokens, temperature, top_p, selected_model):
stop_event.clear()
client = get_client(selected_model)
# 프롬프트 설정 - 시스템 메시지를 자유롭게 설정 가능
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": message}
]
try:
response = ""
total_tokens_used = 0 # 사용된 토큰 수 추적
# 모델에서 응답을 스트리밍
for chunk in client.text_generation(
prompt="\n".join([f"{m['role']}: {m['content']}" for m in messages]),
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True
):
if stop_event.is_set():
break
if chunk:
response += chunk
total_tokens_used += len(chunk.split()) # 청크당 사용된 토큰 수 추산
yield [(message, response, f"사용된 토큰 수: {total_tokens_used}/{max_tokens}")]
except Exception as e:
yield [(message, f"오류 발생: {str(e)}", "에러 처리 필요")]
# 이전 응답을 확인하는 함수
def get_last_response(chatbot):
if chatbot and len(chatbot) > 0:
return chatbot[-1][1]
return None
# Gradio UI 구성
def gradio_interface(message, system_message, max_tokens, temperature, top_p, selected_model):
result = None
for output in respond(message, system_message, max_tokens, temperature, top_p, selected_model):
result = output
return result
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
selected_model = gr.Dropdown(choices=list(models.keys()), value="deepseek-ai/DeepSeek-Coder-V2-Instruct", label="모델 선택")
system_message = gr.Textbox(label="시스템 메시지", value="이 메시지를 기준으로 대화 흐름을 설정합니다.")
message = gr.Textbox(label="사용자 메시지")
max_tokens = gr.Slider(minimum=10, maximum=512, value=128, label="최대 토큰 수")
temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature")
top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top-p")
submit_button = gr.Button("응답 생성")
with gr.Column():
chatbot = gr.Chatbot()
token_usage = gr.Textbox(label="토큰 사용량", interactive=False)
# 버튼을 눌러 응답을 받는 함수 연결
submit_button.click(gradio_interface, inputs=[message, system_message, max_tokens, temperature, top_p, selected_model], outputs=[chatbot, token_usage])
# UI 실행
demo.launch() |