Spaces:

SaisExperiments
/

Sad-Llama-3.2-3B

Sleeping

App Files Files

xet

Community

SaisExperiments commited on Apr 2

Commit

a8e97ac

verified ·

1 Parent(s): 5576714

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -33

app.py CHANGED Viewed

@@ -1,64 +1,146 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
-    message,
     history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
             minimum=0.1,
             maximum=1.0,
-            value=0.95,
             step=0.05,
             label="Top-p (nucleus sampling)",
         ),
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+import os
+# --- Installation Note ---
+# Ensure you have the necessary libraries installed:
+# pip install gradio huggingface_hub
+# --- Hugging Face Hub Token ---
+# The InferenceClient might require a Hugging Face Hub token for certain models or usage.
+# Set it as an environment variable HUGGING_FACE_HUB_TOKEN, or log in via `huggingface-cli login`.
+# If the model is public and doesn't require login, this might work without a token.
+# HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN") # Optional: explicitly get token if needed
+client = None
+try:
+    client = InferenceClient(
+        "HuggingFaceH4/zephyr-7b-beta",
+        # token=HUGGING_FACE_HUB_TOKEN # Uncomment if you want to pass token explicitly
+    )
+    print("InferenceClient initialized successfully.")
+except Exception as e:
+    print(f"Error initializing InferenceClient: {e}")
+    print("Please ensure the model identifier is correct and you have necessary permissions/token.")
+    # You might want to exit or raise the error depending on your application structure
+    # For this Gradio app, we'll let the respond function handle the missing client.
 def respond(
+    message: str,
     history: list[tuple[str, str]],
+    system_message: str = "You are a friendly Chatbot.", # Default value matching UI
+    max_tokens: int = 512, # Default value matching UI
+    temperature: float = 0.7, # Default value matching UI
+    top_p: float = 0.95, # Default value matching UI
 ):
+    """
+    Chat response function for the Gradio interface.
+    """
+    # --- Client Check ---
+    if client is None:
+        yield "Error: InferenceClient could not be initialized. Please check server logs."
+        return # Stop generation if client is not available
+    # --- Input Validation (Basic) ---
+    if not message:
+        yield "Error: Please enter a message."
+        return
+    if not system_message:
+        system_message = "You are a helpful assistant." # Fallback system message
     messages = [{"role": "system", "content": system_message}]
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
+    response_text = ""
+    try:
+        # Stream the response
+        for message_chunk in client.chat_completion(
+            messages=messages,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+        ):
+            # Check if delta and content exist and are not None
+            token = message_chunk.choices[0].delta.content
+            # --- Robust Token Handling ---
+            if token is not None:
+                response_text += token
+                yield response_text # Yield the accumulated response incrementally
+    except Exception as e:
+        print(f"Error during API call: {e}")
+        # Yield a user-friendly error message
+        yield f"An error occurred while generating the response: {e}"
+# --- Gradio Interface Definition ---
 demo = gr.ChatInterface(
     respond,
+    chatbot=gr.Chatbot(
+        height=500,
+        label="Zephyr 7B Beta",
+        show_label=True,
+        bubble_full_width=False, # Optional: Adjust bubble width
+    ),
+    title="🤖 Zephyr 7B Beta Chat",
+    description="Chat with the Zephyr 7B Beta model using the Hugging Face Inference API. \nEnter your message and adjust settings below.",
+    examples=[
+        ["Hello, how are you today?"],
+        ["What is the capital of France?"],
+        ["Explain the concept of large language models in simple terms."],
+        ["Write a short poem about the rain."]
+    ],
+    cache_examples=False, # Set to True to cache example results if desired
     additional_inputs=[
+        gr.Textbox(
+            value="You are a friendly and helpful chatbot.", # Default system message
+            label="System Message",
+            info="The instruction given to the chatbot to guide its behavior.",
+        ),
+        gr.Slider(
+            minimum=1,
+            maximum=2048,
+            value=512, # Default max tokens
+            step=1,
+            label="Max New Tokens",
+            info="Maximum number of tokens to generate."
+        ),
+        gr.Slider(
+            minimum=0.1,
+            # Max temperature adjusted: values > 1.0 often degrade quality
+            maximum=1.0,
+            value=0.7, # Default temperature
+            step=0.1,
+            label="Temperature",
+            info="Controls randomness. Lower values make output more focused, higher values make it more diverse."
+        ),
         gr.Slider(
             minimum=0.1,
             maximum=1.0,
+            value=0.95, # Default top-p
             step=0.05,
             label="Top-p (nucleus sampling)",
+            info="Considers only the most probable tokens with cumulative probability p. Helps prevent low-probability tokens."
         ),
     ],
+     additional_inputs_accordion_name="⚙️ Advanced Settings" # Group settings
 )
 if __name__ == "__main__":
+    # Launch the Gradio app
+    demo.launch(
+        # share=True # Uncomment to create a temporary public link (use with caution)
+        # server_name="0.0.0.0" # Uncomment to allow access from your local network
+        # auth=("user", "password") # Optional: Add basic authentication
+    )