Chatbot_Playground_pub

Sleeping

File size: 3,831 Bytes

9c880cb
 
5bdf9aa
 
 
 
9c880cb
a5db718
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e06dbb
5bdf9aa
 
 
 
 
 
 
 
 
 
 
 
a5db718
 
 
 
 
bf4739d
5bdf9aa
 
 
9c880cb
a5db718
1e06dbb
 
 
 
 
 
5bdf9aa
1e06dbb
 
9c880cb
 
 
c2e4a2f
bf4739d
c2e4a2f
1e06dbb
5bdf9aa
1e06dbb
 
9c880cb
 
 
 
5bdf9aa
 
bf4739d

import gradio as gr
from huggingface_hub import InferenceClient
import traceback
import os

hf_token = os.getenv("HF_TOKEN")

def get_model_response(client, messages, max_tokens, temperature, top_p, model_name):
    prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
    
    try:
        # 먼저 chat_completion 시도
        response = client.chat_completion(
            messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            stream=True
        )
        for message in response:
            if hasattr(message.choices[0], 'delta'):
                token = message.choices[0].delta.content
            else:
                token = message.choices[0].text
            if token:
                yield token
    except Exception as chat_error:
        try:
            # chat_completion 실패 시 text_generation 시도
            response = client.text_generation(
                prompt,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                stream=True
            )
            for token in response:
                yield token
        except Exception as text_error:
            # 두 방법 모두 실패 시 오류 메시지 반환
            yield f"모델 {model_name}에 대한 추론 실패:\n"
            yield f"Chat 오류: {str(chat_error)}\n"
            yield f"Text 오류: {str(text_error)}"

def respond(message, history, system_message, max_tokens, temperature, top_p, selected_model):
    try:
        client = InferenceClient(model=selected_model, token=hf_token)
        
        messages = [{"role": "system", "content": system_message}]
        for val in history:
            if val[0]:
                messages.append({"role": "user", "content": val[0]})
            if val[1]:
                messages.append({"role": "assistant", "content": val[1]})
        messages.append({"role": "user", "content": message})
        
        response = ""
        for token in get_model_response(client, messages, max_tokens, temperature, top_p, selected_model):
            response += token
            yield response
        
        if not response:
            yield "모델이 응답을 생성하지 못했습니다. 다른 입력이나 모델을 시도해보세요."
    except Exception as e:
        error_msg = f"오류 발생: {str(e)}\n\n상세 오류:\n{traceback.format_exc()}"
        yield error_msg

# 원래의 모델 목록 복원
models = {
    "deepseek-ai/DeepSeek-Coder-V2-Instruct": "DeepSeek-Coder-V2-Instruct",
    "CohereForAI/c4ai-command-r-plus": "Cohere Command-R Plus",
    "meta-llama/Meta-Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct",
    "bartowski/DeepSeek-V2-Chat-0628-GGUF": "DeepSeek-V2-Chat-0628-GGUF",
    "google/gemma-7b": "Gemma-7b",
    "openai-community/gpt2": "GPT-2"
}

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="""너는 나의 최고의 비서이다.
내가 요구하는것들을 최대한 자세하고 정확하게 답변하라.
반드시 한글로 답변할것.""", label="시스템 메시지"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="최대 새 토큰 수"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="온도"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (핵 샘플링)"),
        gr.Radio(list(models.keys()), value=list(models.keys())[0], label="언어 모델 선택", info="사용할 언어 모델을 선택하세요")
    ],
)

if __name__ == "__main__":
    if not hf_token:
        print("경고: HF_TOKEN 환경 변수가 설정되지 않았습니다. 일부 모델에 접근할 수 없을 수 있습니다.")
    demo.launch(share=True)