Spaces:

Tamim3
/

Test

Runtime error

App Files Files Community

Tamim3 commited on Feb 2

Commit

4c033fe

verified ·

1 Parent(s): fae165e

Update app.py

Browse files

Files changed (1) hide show

app.py +194 -29

app.py CHANGED Viewed

@@ -1,10 +1,16 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
 def respond(
     message,
@@ -13,51 +19,210 @@ def respond(
     max_tokens,
     temperature,
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
     for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.chat_completion(
-        messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
-        token = message.choices[0].delta.content
-        response += token
         yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
-    respond,
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from openai import OpenAI
+import os
+ACCESS_TOKEN = os.getenv("HF_TOKEN")
+print("Access token loaded.")
+client = OpenAI(
+    base_url="https://api-inference.huggingface.co/v1/",
+    api_key=ACCESS_TOKEN,
+)
+print("OpenAI client initialized.")
 def respond(
     message,
     max_tokens,
     temperature,
     top_p,
+    frequency_penalty,
+    seed,
+    custom_model
 ):
+    print(f"Received message: {message}")
+    print(f"History: {history}")
+    print(f"System message: {system_message}")
+    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
+    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
+    print(f"Selected model (custom_model): {custom_model}")
+    # Convert seed to None if -1 (meaning random)
+    if seed == -1:
+        seed = None
     messages = [{"role": "system", "content": system_message}]
+    print("Initial messages array constructed.")
+    # Add conversation history to the context
     for val in history:
+        user_part = val[0]
+        assistant_part = val[1]
+        if user_part:
+            messages.append({"role": "user", "content": user_part})
+            print(f"Added user message to context: {user_part}")
+        if assistant_part:
+            messages.append({"role": "assistant", "content": assistant_part})
+            print(f"Added assistant message to context: {assistant_part}")
+    # Append the latest user message
     messages.append({"role": "user", "content": message})
+    print("Latest user message appended.")
+    # If user provided a model, use that; otherwise, fall back to a default model
+    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
+    print(f"Model selected for inference: {model_to_use}")
+    # Start with an empty string to build the response as tokens stream in
     response = ""
+    print("Sending request to OpenAI API.")
+    for message_chunk in client.chat.completions.create(
+        model=model_to_use,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
+        frequency_penalty=frequency_penalty,
+        seed=seed,
+        messages=messages,
     ):
+        token_text = message_chunk.choices[0].delta.content
+        print(f"Received token: {token_text}")
+        response += token_text
         yield response
+    print("Completed response generation.")
+# GRADIO UI
+chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", likeable=True, layout="panel")
+print("Chatbot interface created.")
+system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
+max_tokens_slider = gr.Slider(
+    minimum=1,
+    maximum=4096,
+    value=512,
+    step=1,
+    label="Max new tokens"
+)
+temperature_slider = gr.Slider(
+    minimum=0.1,
+    maximum=4.0,
+    value=0.7,
+    step=0.1,
+    label="Temperature"
+)
+top_p_slider = gr.Slider(
+    minimum=0.1,
+    maximum=1.0,
+    value=0.95,
+    step=0.05,
+    label="Top-P"
+)
+frequency_penalty_slider = gr.Slider(
+    minimum=-2.0,
+    maximum=2.0,
+    value=0.0,
+    step=0.1,
+    label="Frequency Penalty"
+)
+seed_slider = gr.Slider(
+    minimum=-1,
+    maximum=65535,
+    value=-1,
+    step=1,
+    label="Seed (-1 for random)"
+)
+# The custom_model_box is what the respond function sees as "custom_model"
+custom_model_box = gr.Textbox(
+    value="",
+    label="Custom Model",
+    info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
+    placeholder="meta-llama/Llama-3.3-70B-Instruct"
+)
+def set_custom_model_from_radio(selected):
+    """
+    This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
+    We will update the Custom Model text box with that selection automatically.
+    """
+    print(f"Featured model selected: {selected}")
+    return selected
 demo = gr.ChatInterface(
+    fn=respond,
     additional_inputs=[
+        system_message_box,
+        max_tokens_slider,
+        temperature_slider,
+        top_p_slider,
+        frequency_penalty_slider,
+        seed_slider,
+        custom_model_box,
     ],
+    fill_height=True,
+    chatbot=chatbot,
+    theme="Nymbo/Nymbo_Theme",
 )
+print("ChatInterface object created.")
+with demo:
+    with gr.Accordion("Model Selection", open=False):
+        model_search_box = gr.Textbox(
+            label="Filter Models",
+            placeholder="Search for a featured model...",
+            lines=1
+        )
+        print("Model search box created.")
+        models_list = [
+            "meta-llama/Llama-3.3-70B-Instruct",
+            "meta-llama/Llama-3.1-70B-Instruct",
+            "meta-llama/Llama-3.0-70B-Instruct",
+            "meta-llama/Llama-3.2-3B-Instruct",
+            "meta-llama/Llama-3.2-1B-Instruct",
+            "meta-llama/Llama-3.1-8B-Instruct",
+            "NousResearch/Hermes-3-Llama-3.1-8B",
+            "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+            "mistralai/Mistral-Nemo-Instruct-2407",
+            "mistralai/Mixtral-8x7B-Instruct-v0.1",
+            "mistralai/Mistral-7B-Instruct-v0.3",
+            "mistralai/Mistral-7B-Instruct-v0.2",
+            "Qwen/Qwen2.5-72B-Instruct",
+            "Qwen/Qwen2.5-3B-Instruct",
+            "Qwen/Qwen2.5-0.5B-Instruct",
+            "Qwen/QwQ-32B-Preview",
+            "Qwen/Qwen2.5-Coder-32B-Instruct",
+            "microsoft/Phi-3.5-mini-instruct",
+            "microsoft/Phi-3-mini-128k-instruct",
+            "microsoft/Phi-3-mini-4k-instruct",
+            "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+            "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+            "HuggingFaceH4/zephyr-7b-beta",
+            "HuggingFaceTB/SmolLM2-360M-Instruct",
+            "tiiuae/falcon-7b-instruct",
+            "01-ai/Yi-1.5-34B-Chat",
+        ]
+        print("Models list initialized.")
+        featured_model_radio = gr.Radio(
+            label="Select a model below",
+            choices=models_list,
+            value="meta-llama/Llama-3.3-70B-Instruct",
+            interactive=True
+        )
+        print("Featured models radio button created.")
+        def filter_models(search_term):
+            print(f"Filtering models with search term: {search_term}")
+            filtered = [m for m in models_list if search_term.lower() in m.lower()]
+            print(f"Filtered models: {filtered}")
+            return gr.update(choices=filtered)
+        model_search_box.change(
+            fn=filter_models,
+            inputs=model_search_box,
+            outputs=featured_model_radio
+        )
+        print("Model search box change event linked.")
+        featured_model_radio.change(
+            fn=set_custom_model_from_radio,
+            inputs=featured_model_radio,
+            outputs=custom_model_box
+        )
+        print("Featured model radio button change event linked.")
+print("Gradio interface initialized.")
 if __name__ == "__main__":
+    print("Launching the demo application.")
+    demo.launch()