3-1_LLM_PLAYGROUND

Sleeping

App Files Files Community

Kims12 commited on Dec 31, 2024

Commit

7dcc8af

verified ·

1 Parent(s): b34f0d5

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -90

app.py CHANGED Viewed

@@ -1,105 +1,118 @@
 import gradio as gr
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
-import torch
-# 모델 및 토크나이저 로드
-model_id = "meta-llama/Llama-3.3-70B-Instruct"  # 사용하려는 LLaMA 모델 ID
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
-    load_in_8bit=False  # 메모리 절약을 위해 8-bit 로드 사용 가능
-)
-# 텍스트 생성 파이프라인 설정
-text_generator = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    device_map="auto",
-    torch_dtype=torch.bfloat16,
-    max_length=2048,  # 필요에 따라 조정
-)
-def generate_response(
-    user_input,
-    system_prompt,
-    max_new_tokens,
     temperature,
-    top_p
 ):
-    """
-    사용자 입력과 옵션을 받아 모델의 응답을 생성하는 함수
-    """
-    # 시스템 프롬프트와 사용자 입력을 결합
-    full_prompt = system_prompt + "\n" + user_input
-    # 텍스트 생성
-    outputs = text_generator(
-        full_prompt,
-        max_new_tokens=max_new_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        eos_token_id=tokenizer.eos_token_id,
-        pad_token_id=tokenizer.eos_token_id,
-    )
-    # 생성된 텍스트 반환
-    return outputs[0]['generated_text'][len(full_prompt):].strip()
-# Gradio 인터페이스 구성
 with gr.Blocks() as demo:
-    gr.Markdown("# LLaMA 기반 대화형 챗봇")
     with gr.Row():
-        with gr.Column():
-            system_prompt = gr.Textbox(
-                label="시스템 프롬프트",
-                value="You are a helpful assistant.",
-                lines=2
-            )
-            user_input = gr.Textbox(
-                label="사용자 입력",
-                placeholder="질문을 입력하세요...",
-                lines=4
             )
-        with gr.Column():
-            max_new_tokens = gr.Slider(
-                label="Max New Tokens",
-                minimum=16,
-                maximum=2048,
-                step=16,
-                value=256
             )
-            temperature = gr.Slider(
-                label="Temperature",
-                minimum=0.1,
-                maximum=1.0,
-                step=0.1,
-                value=0.7
-            )
-            top_p = gr.Slider(
-                label="Top-p (nucleus sampling)",
-                minimum=0.1,
-                maximum=1.0,
-                step=0.1,
-                value=0.9
-            )
-    generate_button = gr.Button("생성")
-    output = gr.Textbox(
-        label="응답",
-        lines=10
-    )
-    # 버튼 클릭 시 응답 생성
-    generate_button.click(
-        fn=generate_response,
-        inputs=[user_input, system_prompt, max_new_tokens, temperature, top_p],
-        outputs=output
-    )
-# Gradio 앱 실행
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from huggingface_hub import InferenceClient
+import os
+MODELS = {
+    "Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
+    "DeepSeek Coder V2": "deepseek-ai/DeepSeek-Coder-V2-Instruct",
+    "Meta Llama 3.1 8B": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "Meta-Llama 3.1 70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct",
+    "Microsoft": "microsoft/Phi-3-mini-4k-instruct",
+    "Mixtral 8x7B": "mistralai/Mistral-7B-Instruct-v0.3",
+    "Mixtral Nous-Hermes": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+    "Cohere Command R+": "CohereForAI/c4ai-command-r-plus",
+    "Aya-23-35B": "CohereForAI/aya-23-35B"
+}
+def get_client(model_name):
+    model_id = MODELS[model_name]
+    hf_token = os.getenv("HF_TOKEN")
+    if not hf_token:
+        raise ValueError("HF_TOKEN environment variable is required")
+    return InferenceClient(model_id, token=hf_token)
+def respond(
+    message,
+    chat_history,
+    model_name,
+    max_tokens,
     temperature,
+    top_p,
+    system_message,
 ):
+    try:
+        client = get_client(model_name)
+    except ValueError as e:
+        chat_history.append((message, str(e)))
+        return chat_history
+    messages = [{"role": "system", "content": system_message}]
+    for human, assistant in chat_history:
+        messages.append({"role": "user", "content": human})
+        messages.append({"role": "assistant", "content": assistant})
+    messages.append({"role": "user", "content": message})
+    try:
+        if "Cohere" in model_name:
+            # Cohere 모델을 위한 비스트리밍 처리
+            response = client.chat_completion(
+                messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+            )
+            assistant_message = response.choices[0].message.content
+            chat_history.append((message, assistant_message))
+            yield chat_history
+        else:
+            # 다른 모델들을 위한 스트리밍 처리
+            stream = client.chat_completion(
+                messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                stream=True,
+            )
+            partial_message = ""
+            for response in stream:
+                if response.choices[0].delta.content is not None:
+                    partial_message += response.choices[0].delta.content
+                    if len(chat_history) > 0 and chat_history[-1][0] == message:
+                        chat_history[-1] = (message, partial_message)
+                    else:
+                        chat_history.append((message, partial_message))
+                    yield chat_history
+    except Exception as e:
+        error_message = f"An error occurred: {str(e)}"
+        chat_history.append((message, error_message))
+        yield chat_history
+def clear_conversation():
+    return []
 with gr.Blocks() as demo:
+    gr.Markdown("# Prompting AI Chatbot")
+    gr.Markdown("언어모델별 프롬프트 테스트 챗봇입니다.")
     with gr.Row():
+        with gr.Column(scale=1):
+            model_name = gr.Radio(
+                choices=list(MODELS.keys()),
+                label="Language Model",
+                value="Zephyr 7B Beta"
             )
+            max_tokens = gr.Slider(minimum=0, maximum=2000, value=500, step=100, label="Max Tokens")
+            temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="Temperature")
+            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
+            system_message = gr.Textbox(
+                value="""반드시 한글로 답변할 것.
+너는 최고의 비서이다.
+내가 요구하는것들을 최대한 자세하고 정확하게 답변하라.
+""",
+                label="System Message",
+                lines=3
             )
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot()
+            msg = gr.Textbox(label="메세지를 입력하세요")
+            with gr.Row():
+                submit_button = gr.Button("전송")
+                clear_button = gr.Button("대화 내역 지우기")
+    msg.submit(respond, [msg, chatbot, model_name, max_tokens, temperature, top_p, system_message], chatbot)
+    submit_button.click(respond, [msg, chatbot, model_name, max_tokens, temperature, top_p, system_message], chatbot)
+    clear_button.click(clear_conversation, outputs=chatbot, queue=False)
 if __name__ == "__main__":
+    demo.launch()