Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on Jan 4

Commit

fde397b

verified ·

1 Parent(s): f7c4208

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -96

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from openai import OpenAI
 import os
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
@@ -22,6 +23,7 @@ def respond(
     top_p,
     frequency_penalty,
     seed,
     model,
     custom_model
 ):
@@ -35,8 +37,9 @@ def respond(
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
-    - model: the selected model
-    - custom_model: a custom model provided by the user
     """
     print(f"Received message: {message}")
@@ -44,18 +47,12 @@ def respond(
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
-    print(f"Model: {model}, Custom Model: {custom_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
-    # Use custom model if provided, otherwise use selected model
-    if custom_model.strip() != "":
-        model_to_use = custom_model.strip()
-    else:
-        model_to_use = model
     # Construct the messages array required by the API
     messages = [{"role": "system", "content": system_message}]
@@ -73,13 +70,30 @@ def respond(
     # Append the latest user message
     messages.append({"role": "user", "content": message})
     # Start with an empty string to build the response as tokens stream in
     response = ""
-    print("Sending request to OpenAI API.")
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
-        model=model_to_use,   # Use the selected or custom model
         max_tokens=max_tokens,
         stream=True,  # Stream the response
         temperature=temperature,
@@ -91,36 +105,34 @@ def respond(
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
-        response += token_text
-        yield response
-    print("Completed response generation.")
-# Create a Chatbot component with a specified height
-chatbot = gr.Chatbot(height=600)
-print("Chatbot interface created.")
-# List of placeholder models for demonstration
 models_list = [
-    "meta-llama/Llama-3.3-70B-Instruct",
-    "meta-llama/Llama-2-70B-chat",
-    "google/flan-t5-xl"
 ]
-# Function to filter models based on search input
-def filter_models(search_term):
-    filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
-    return gr.update(choices=filtered_models)
 # Create the Gradio ChatInterface
-# Adding additional fields for model selection and parameters
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
         gr.Textbox(value="", label="System message"),
-        gr.Slider(minimum=1,   maximum=4096, value=512, step=1,   label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0,  value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0,  value=0.95, step=0.05, label="Top-P"),
         gr.Slider(
             minimum=-2.0,
             maximum=2.0,
@@ -130,80 +142,90 @@ demo = gr.ChatInterface(
         ),
         gr.Slider(
             minimum=-1,
-            maximum=65535,  # Arbitrary upper limit for demonstration
             value=-1,
             step=1,
             label="Seed (-1 for random)"
         ),
-        gr.Textbox(label="Custom Model", placeholder="Enter custom model path here"),
-        gr.Accordion("Featured Models", open=True).update(
-            gr.Column([
-                gr.Textbox(label="Filter Models", placeholder="Search for a featured model...").change(
-                    filter_models, inputs="__self__", outputs="model"
-                ),
-                gr.Radio(label="Select a model below", value="meta-llama/Llama-3.3-70B-Instruct", choices=models_list, interactive=True, elem_id="model-radio")
-            ])
-        )
     ],
     fill_height=True,
     chatbot=chatbot,
     theme="Nymbo/Nymbo_Theme",
 )
-# Adding an "Information" tab with accordions for "Featured Models" and "Parameters Overview"
-with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo:
-    with gr.Tab("Chat"):
-        gr.Markdown("## Chat with the Model")
-        chatbot.render()
-    with gr.Tab("Information"):
-        with gr.Accordion("Featured Models", open=False):
-            gr.HTML(
-                """
-                <p><a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending">See all available models</a></p>
-                <table style="width:100%; text-align:center; margin:auto;">
-                    <tr>
-                        <th>Model Name</th>
-                        <th>Type</th>
-                        <th>Notes</th>
-                    </tr>
-                    <tr>
-                        <td>Llama-3.3-70B-Instruct</td>
-                        <td>Instruction</td>
-                        <td>High performance</td>
-                    </tr>
-                    <tr>
-                        <td>Llama-2-70B-chat</td>
-                        <td>Chat</td>
-                        <td>Conversational</td>
-                    </tr>
-                    <tr>
-                        <td>Flan-T5-XL</td>
-                        <td>General</td>
-                        <td>Versatile</td>
-                    </tr>
-                </table>
-                """
-            )
-        with gr.Accordion("Parameters Overview", open=False):
-            gr.Markdown(
-                """
-                ## Parameters Overview
-                ### Max new tokens
-                This slider controls the maximum number of tokens to generate in the response.
-                ### Temperature
-                Sampling temperature, which controls the randomness. A higher temperature makes the output more random.
-                ### Top-P
-                Top-p (nucleus) sampling, which controls the diversity. The model considers the smallest number of tokens whose cumulative probability exceeds the top-p threshold.
-                ### Frequency Penalty
-                Penalizes repeated tokens in the output, which helps to reduce repetition.
-                ### Seed
-                A fixed seed for reproducibility. Set to -1 for random seed.
-                """
-            )
-print("Launching the demo application.")
-demo.launch()

 import gradio as gr
 from openai import OpenAI
 import os
+import time
 # Retrieve the access token from the environment variable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
     top_p,
     frequency_penalty,
     seed,
+    model_filter,
     model,
     custom_model
 ):
     - top_p: top-p (nucleus) sampling
     - frequency_penalty: penalize repeated tokens in the output
     - seed: a fixed seed for reproducibility; -1 will mean 'random'
+    - model_filter: search term to filter available models
+    - model: the selected model from the radio choices
+    - custom_model: manually entered HF model path
     """
     print(f"Received message: {message}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
+    print(f"Model Filter: {model_filter}, Selected Model: {model}, Custom Model: {custom_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
     # Construct the messages array required by the API
     messages = [{"role": "system", "content": system_message}]
     # Append the latest user message
     messages.append({"role": "user", "content": message})
+    # Determine the model to use
+    # Set the API URL based on the selected model or custom model
+    if custom_model.strip() != "":
+        api_model = custom_model.strip()
+    else:
+        if model == "Llama-3-70B-Instruct":
+            api_model = "meta-llama/Llama-3.3-70B-Instruct"
+        elif model == "Mistral-7B-Instruct-v0.2":
+            api_model = "mistralai/Mistral-7B-Instruct-v0.2"
+        elif model == "OpenHermes-2.5-Mistral-7B":
+            api_model = "teknium/OpenHermes-2.5-Mistral-7B"
+        elif model == "Phi-2":
+            api_model = "microsoft/Phi-2"
+        else:
+            api_model = "meta-llama/Llama-3.3-70B-Instruct"
+        print(f"Using model: {api_model}")
     # Start with an empty string to build the response as tokens stream in
     response = ""
+    print(f"Sending request to OpenAI API, using model {api_model}.")
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
+        model=api_model,
         max_tokens=max_tokens,
         stream=True,  # Stream the response
         temperature=temperature,
         # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         print(f"Received token: {token_text}")
+        # Check if token_text is None before appending
+        if token_text is not None:
+            response += token_text
+            yield response
+    print("Completed response generation.")
+# Placeholder list of models for the accordion
 models_list = [
+    "Llama-3-70B-Instruct",
+    "Mistral-7B-Instruct-v0.2",
+    "OpenHermes-2.5-Mistral-7B",
+    "Phi-2",
 ]
+# Create a Chatbot component with a specified height
+chatbot = gr.Chatbot(height=600)
+print("Chatbot interface created.")
 # Create the Gradio ChatInterface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
         gr.Textbox(value="", label="System message"),
+        gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
         gr.Slider(
             minimum=-2.0,
             maximum=2.0,
         ),
         gr.Slider(
             minimum=-1,
+            maximum=65535,
             value=-1,
             step=1,
             label="Seed (-1 for random)"
         ),
+        gr.Textbox(label="Filter Featured Models", placeholder="Search...", lines=1),
+        gr.Radio(label="Select a Featured Model", choices=models_list, value="Llama-3-70B-Instruct"),
+        gr.Textbox(label="Custom Model", placeholder="Enter Hugging Face model path", lines=1),
     ],
+    additional_inputs_accordion=gr.Accordion("Advanced Parameters", open=False),
     fill_height=True,
     chatbot=chatbot,
     theme="Nymbo/Nymbo_Theme",
 )
+# Add the "Information" tab to the demo
+with gr.Tab("Information", parent=demo):
+    with gr.Accordion("Featured Models", open=True):
+        gr.HTML(
+            """
+        <table style="width:100%; text-align:center; margin:auto;">
+            <tr>
+                <th>Model Name</th>
+                <th>Provider</th>
+                <th>Notes</th>
+            </tr>
+            <tr>
+                <td>Llama-3-70B-Instruct</td>
+                <td>Meta</td>
+                <td>Powerful large language model.</td>
+            </tr>
+            <tr>
+                <td>Mistral-7B-Instruct-v0.2</td>
+                <td>Mistral AI</td>
+                <td>Efficient and versatile model.</td>
+            </tr>
+            <tr>
+                <td>OpenHermes-2.5-Mistral-7B</td>
+                <td>Teknium</td>
+                <td>Community-driven, fine-tuned model.</td>
+            </tr>
+            <tr>
+                <td>Phi-2</td>
+                <td>Microsoft</td>
+                <td>Compact yet powerful model.</td>
+            </tr>
+        </table>
+        """
+        )
+    with gr.Accordion("Parameters Overview", open=False):
+        gr.Markdown(
+        """
+        ## System Message
+        ###### The system message sets the behavior and persona of the chatbot. It's a way to provide context and instructions to the AI. For example, you can tell it to act as a helpful assistant, a storyteller, or any other role.
+        ## Max New Tokens
+        ###### This setting limits the length of the response generated by the AI. A higher number allows for longer, more detailed responses, while a lower number keeps the responses concise.
+        ## Temperature
+        ###### Temperature controls the randomness of the AI's output. A higher temperature makes the responses more creative and varied, while a lower temperature makes them more predictable and focused.
+        ## Top-P (Nucleus Sampling)
+        ###### Top-P sampling is a way to control the diversity of the AI's responses. It sets a threshold for the cumulative probability of the most likely next words. The AI then randomly selects from the words whose probabilities add up to this threshold. A lower Top-P value means less diversity.
+        ## Frequency Penalty
+        ###### Frequency penalty discourages the AI from repeating the same words or phrases too often in its responses. A higher penalty means the AI is less likely to repeat itself.
+        ## Seed
+        ###### The seed is a starting point for the random number generator that influences the AI's responses. If you set a specific seed, you'll get the same response every time you use that seed with the same prompt and settings. If you set it to -1, the AI will generate a new seed each time, leading to different responses.
+        ## Featured Models
+        ###### This section lists pre-selected models that are known to perform well. You can filter the list by typing in the search box.
+        ## Custom Model
+        ###### If you want to use a model that's not in the featured list, you can enter its Hugging Face model path here.
+        ### Feel free to experiment with these settings to see how they affect the AI's responses. Happy chatting!
+        """
+        )
+# Filter models function
+def filter_models(search_term, model_radio):
+    filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
+    if not filtered_models:
+        filtered_models = ["No matching models"]  # Provide feedback
+    return gr.Radio.update(choices=filtered_models)
+# Update model list when search box is used
+demo.additional_inputs[6].change(filter_models, inputs=[demo.additional_inputs[6], demo.additional_inputs[7]], outputs=demo.additional_inputs[7])
+print("Gradio interface initialized.")
+if __name__ == "__main__":
+    print("Launching the demo application.")
+    demo.queue().launch()