GPT-OSS

Running on Zero

App Files Files Community

Spestly commited on Jul 11

Commit

c7c3bd5

verified ·

1 Parent(s): 50da5e2

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -36

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import time
 import spaces
 # Model configurations
 MODELS = {
@@ -44,10 +45,9 @@ def generate_response(model_id, conversation, user_message, max_length=512, temp
     )
     messages.append({"role": "system", "content": system_prompt})
-    # Add conversation history (OpenAI-style)
     for msg in conversation:
-        if msg["role"] in ("user", "assistant"):
-            messages.append({"role": msg["role"], "content": msg["content"]})
     # Add current user message
     messages.append({"role": "user", "content": user_message})
@@ -76,33 +76,61 @@ def generate_response(model_id, conversation, user_message, max_length=512, temp
         outputs[0][inputs['input_ids'].shape[-1]:],
         skip_special_tokens=True
     ).strip()
     return response, load_time, generation_time
-def respond(history, message, model_name, max_length, temperature):
-    """Main function for custom Chatbot interface"""
     if not message.strip():
-        history = history + [["user", message], ["assistant", "Please enter a message"]]
-        return history, ""
-    model_id = MODELS.get(model_name, MODELS["Athena-R3X 8B"])
     try:
-        # Format history for Athena
-        formatted_history = []
-        for i in range(0, len(history), 2):
-            if i < len(history):
-                user_msg = history[i][1] if history[i][0] == "user" else ""
-                assistant_msg = history[i+1][1] if i+1 < len(history) and history[i+1][0] == "assistant" else ""
-                if user_msg:
-                    formatted_history.append({"role": "user", "content": user_msg})
-                if assistant_msg:
-                    formatted_history.append({"role": "assistant", "content": assistant_msg})
         response, load_time, generation_time = generate_response(
-            model_id, formatted_history, message, max_length, temperature
         )
-        history = history + [["user", message], ["assistant", response]]
-        return history, ""
     except Exception as e:
-        history = history + [["user", message], ["assistant", f"Error: {str(e)}"]]
-        return history, ""
 css = """
 .message {
@@ -110,22 +138,73 @@ css = """
     margin: 5px;
     border-radius: 10px;
 }
 """
 theme = gr.themes.Monochrome()
-with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
     gr.Markdown("# 🚀 Athena Playground Chat")
     gr.Markdown("*Powered by HuggingFace ZeroGPU*")
-    chatbot = gr.Chatbot(height=500, label="Athena")
-    state = gr.State([])  # chat history
     with gr.Row():
-        user_input = gr.Textbox(label="Your message", scale=8, autofocus=True)
-        send_btn = gr.Button(value="Send", scale=1)
-    # --- Configuration controls at the bottom ---
     gr.Markdown("### ⚙️ Model & Generation Settings")
     with gr.Row():
         model_choice = gr.Dropdown(
@@ -135,7 +214,7 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
             info="Select which Athena model to use"
         )
         max_length = gr.Slider(
-            32, 2048, value=512,
             label="📝 Max Tokens",
             info="Maximum number of tokens to generate"
         )
@@ -145,14 +224,35 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
             info="Higher values = more creative responses"
         )
-    def chat_submit(history, message, model_name, max_length, temperature):
-        return respond(history, message, model_name, max_length, temperature)
     send_btn.click(
         chat_submit,
-        inputs=[state, user_input, model_choice, max_length, temperature],
-        outputs=[chatbot, user_input]
     )
 if __name__ == "__main__":
     demo.launch()

 import torch
 import time
 import spaces
+import re
 # Model configurations
 MODELS = {
     )
     messages.append({"role": "system", "content": system_prompt})
+    # Add conversation history
     for msg in conversation:
+        messages.append(msg)
     # Add current user message
     messages.append({"role": "user", "content": user_message})
         outputs[0][inputs['input_ids'].shape[-1]:],
         skip_special_tokens=True
     ).strip()
+    print(f"Generation time: {generation_time:.2f}s")
     return response, load_time, generation_time
+def format_response_with_thinking(response):
+    """Format response to handle <think></think> tags"""
+    # Check if response contains thinking tags
+    if '<think>' in response and '</think>' in response:
+        # Split the response into parts
+        pattern = r'(.*?)(<think>(.*?)</think>)(.*)'
+        match = re.search(pattern, response, re.DOTALL)
+        if match:
+            before_thinking = match.group(1).strip()
+            thinking_content = match.group(3).strip()
+            after_thinking = match.group(4).strip()
+            # Create HTML with collapsible thinking section
+            html = f"{before_thinking}\n"
+            html += f'<div class="thinking-container">'
+            html += f'<button class="thinking-toggle" onclick="this.nextElementSibling.classList.toggle(\'hidden\'); this.textContent = this.textContent === \'Show reasoning\' ? \'Hide reasoning\' : \'Show reasoning\'">Show reasoning</button>'
+            html += f'<div class="thinking-content hidden">{thinking_content}</div>'
+            html += f'</div>\n'
+            html += after_thinking
+            return html
+    # If no thinking tags, return the original response
+    return response
+def chat_submit(message, chat_history, conversation_state, model_name, max_length, temperature):
+    """Process a new message and update the chat history"""
     if not message.strip():
+        return "", chat_history, conversation_state
+    model_id = MODELS.get(model_name, MODELS["Athena-R3X 4B"])
     try:
         response, load_time, generation_time = generate_response(
+            model_id, conversation_state, message, max_length, temperature
         )
+        # Update the conversation state with the raw response
+        conversation_state.append({"role": "user", "content": message})
+        conversation_state.append({"role": "assistant", "content": response})
+        # Format the response for display
+        formatted_response = format_response_with_thinking(response)
+        # Update the visible chat history
+        chat_history.append((message, formatted_response))
+        return "", chat_history, conversation_state
     except Exception as e:
+        error_message = f"Error: {str(e)}"
+        chat_history.append((message, error_message))
+        return "", chat_history, conversation_state
 css = """
 .message {
     margin: 5px;
     border-radius: 10px;
 }
+.thinking-container {
+    margin: 10px 0;
+}
+.thinking-toggle {
+    background-color: #f1f1f1;
+    border: 1px solid #ddd;
+    border-radius: 4px;
+    padding: 5px 10px;
+    cursor: pointer;
+    font-size: 0.9em;
+    margin-bottom: 5px;
+    color: #555;
+}
+.thinking-content {
+    background-color: #f9f9f9;
+    border-left: 3px solid #ccc;
+    padding: 10px;
+    margin-top: 5px;
+    font-size: 0.95em;
+    color: #555;
+    font-family: monospace;
+    white-space: pre-wrap;
+    overflow-x: auto;
+}
+.hidden {
+    display: none;
+}
+"""
+# Add JavaScript to handle the toggle functionality
+js = """
+function setupThinkingToggles() {
+    document.querySelectorAll('.thinking-toggle').forEach(button => {
+        button.addEventListener('click', function() {
+            const content = this.nextElementSibling;
+            content.classList.toggle('hidden');
+            this.textContent = content.classList.contains('hidden') ? 'Show reasoning' : 'Hide reasoning';
+        });
+    });
+}
+// Run after the page loads and when the chat updates
+document.addEventListener('DOMContentLoaded', setupThinkingToggles);
+const observer = new MutationObserver(setupThinkingToggles);
+observer.observe(document.body, { childList: true, subtree: true });
 """
 theme = gr.themes.Monochrome()
+with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme, js=js) as demo:
     gr.Markdown("# 🚀 Athena Playground Chat")
     gr.Markdown("*Powered by HuggingFace ZeroGPU*")
+    # State to keep track of the conversation for the model
+    conversation_state = gr.State([])
+    chatbot = gr.Chatbot(height=500, label="Athena", render_markdown=True)
     with gr.Row():
+        user_input = gr.Textbox(label="Your message", scale=8, autofocus=True, placeholder="Type your message here...")
+        send_btn = gr.Button(value="Send", scale=1, variant="primary")
+    # Configuration controls
     gr.Markdown("### ⚙️ Model & Generation Settings")
     with gr.Row():
         model_choice = gr.Dropdown(
             info="Select which Athena model to use"
         )
         max_length = gr.Slider(
+            32, 8000, value=512,
             label="📝 Max Tokens",
             info="Maximum number of tokens to generate"
         )
             info="Higher values = more creative responses"
         )
+    # Connect the interface components
+    submit_event = user_input.submit(
+        chat_submit,
+        inputs=[user_input, chatbot, conversation_state, model_choice, max_length, temperature],
+        outputs=[user_input, chatbot, conversation_state]
+    )
     send_btn.click(
         chat_submit,
+        inputs=[user_input, chatbot, conversation_state, model_choice, max_length, temperature],
+        outputs=[user_input, chatbot, conversation_state]
     )
+    # Add examples if desired
+    gr.Examples(
+        examples=[
+            "What is artificial intelligence?",
+            "Can you explain quantum computing?",
+            "Write a short poem about technology",
+            "What are some ethical concerns about AI?"
+        ],
+        inputs=[user_input]
+    )
+    gr.Markdown("""
+    ### About the Thinking Tags
+    Some Athena models (particularly R3X series) include reasoning in `<think></think>` tags.
+    Click "Show reasoning" to see the model's thought process behind its answers.
+    """)
 if __name__ == "__main__":
     demo.launch()