Dhanishtha-2.0-preview

Running on Zero

App Files Files Community

Abhaykoul commited on 13 days ago

Commit

8e5bed7

verified ·

1 Parent(s): 04e28a8

Update app.py

Browse files

Files changed (1) hide show

app.py +262 -643

app.py CHANGED Viewed

@@ -1,12 +1,7 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
-import threading
-import queue
-import time
 import spaces
-import sys
-from io import StringIO
 import re
 # Model configuration
@@ -37,42 +32,12 @@ def load_model():
     print("Model loaded successfully!")
-class StreamCapture:
-    """Capture streaming output from TextStreamer"""
-    def __init__(self):
-        self.text_queue = queue.Queue()
-        self.captured_text = ""
-    def write(self, text):
-        """Capture written text"""
-        if text and text.strip():
-            self.captured_text += text
-            self.text_queue.put(text)
-        return len(text)
-    def flush(self):
-        """Flush method for compatibility"""
-        pass
-    def get_text(self):
-        """Get all captured text"""
-        return self.captured_text
-    def reset(self):
-        """Reset the capture"""
-        self.captured_text = ""
-        while not self.text_queue.empty():
-            try:
-                self.text_queue.get_nowait()
-            except queue.Empty:
-                break
 def format_thinking_text(text):
-    """Format text to properly display <think> and <ser> tags in Gradio with styled borders"""
     if not text:
         return text
-    # More sophisticated formatting for thinking and SER blocks
     formatted_text = text
     # Handle thinking blocks with proper HTML-like styling for Gradio
@@ -92,178 +57,115 @@ def format_thinking_text(text):
 </div>
 </div>
-'''
-    # Handle SER blocks with purple/violet styling and structured formatting
-    ser_pattern = r'<ser>(.*?)</ser>'
-    def replace_ser_block(match):
-        ser_content = match.group(1).strip()
-        # Parse structured SER content if it follows the pattern
-        ser_lines = ser_content.split('\n')
-        formatted_content = []
-        for line in ser_lines:
-            line = line.strip()
-            if not line:
-                continue
-            # Check if line has the "Key ==> Value" pattern
-            if ' ==> ' in line:
-                parts = line.split(' ==> ', 1)
-                if len(parts) == 2:
-                    key = parts[0].strip()
-                    value = parts[1].strip()
-                    formatted_content.append(f'<div style="margin: 8px 0;"><strong style="color: #8e44ad;">{key}:</strong> <span style="color: #2c3e50;">{value}</span></div>')
-                else:
-                    formatted_content.append(f'<div style="margin: 4px 0; color: #2c3e50;">{line}</div>')
-            else:
-                formatted_content.append(f'<div style="margin: 4px 0; color: #2c3e50;">{line}</div>')
-        if not formatted_content:
-            formatted_content = [f'<div style="color: #2c3e50; line-height: 1.6;">{ser_content}</div>']
-        content_html = ''.join(formatted_content)
-        # Use HTML div with inline CSS for purple border styling for SER
-        return f'''
-<div style="border-left: 4px solid #8e44ad; background: linear-gradient(135deg, #f8f4ff 0%, #ede7f6 100%); padding: 16px 20px; margin: 16px 0; border-radius: 12px; font-family: 'Segoe UI', sans-serif; box-shadow: 0 2px 8px rgba(142, 68, 173, 0.15); border: 1px solid rgba(142, 68, 173, 0.2);">
-<div style="color: #8e44ad; font-weight: 600; margin-bottom: 10px; display: flex; align-items: center; font-size: 14px;">
-<span style="margin-right: 8px;">💜</span> SER (Structured Emotional Reasoning)
-</div>
-<div style="line-height: 1.6; font-size: 14px;">
-{content_html}
-</div>
-</div>
 '''
     formatted_text = re.sub(thinking_pattern, replace_thinking_block, formatted_text, flags=re.DOTALL)
-    formatted_text = re.sub(ser_pattern, replace_ser_block, formatted_text, flags=re.DOTALL)
     # Clean up any remaining raw tags that might not have been caught
     formatted_text = re.sub(r'</?think>', '', formatted_text)
-    formatted_text = re.sub(r'</?ser>', '', formatted_text)
     return formatted_text.strip()
 @spaces.GPU()
 def generate_response(message, history, max_tokens, temperature, top_p):
-    """Generate streaming response with improved TextStreamer"""
     global model, tokenizer
     if model is None or tokenizer is None:
         yield "Model is still loading. Please wait..."
         return
     # Prepare conversation history
     messages = []
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
     # Add current message
     messages.append({"role": "user", "content": message})
     # Apply chat template
     text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True
     )
     # Tokenize input
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
-    # Create stream capture
-    stream_capture = StreamCapture()
-    # Create TextStreamer with our capture - don't skip special tokens to preserve <think> and <ser>
-    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=False)
-    # Temporarily redirect the streamer's output
-    original_stdout = sys.stdout
-    # Generation parameters
-    generation_kwargs = {
-        **model_inputs,
-        "max_new_tokens": max_tokens,
-        "temperature": temperature,
-        "top_p": top_p,
-        "do_sample": True,
-        "pad_token_id": tokenizer.eos_token_id,
-        "streamer": streamer,
-    }
-    # Start generation in a separate thread
-    def generate():
-        try:
-            # Redirect stdout to capture streamer output
-            sys.stdout = stream_capture
-            with torch.no_grad():
-                model.generate(**generation_kwargs)
-        except Exception as e:
-            stream_capture.text_queue.put(f"Error: {str(e)}")
-        finally:
-            # Restore stdout
-            sys.stdout = original_stdout
-            stream_capture.text_queue.put(None)  # Signal end
-    thread = threading.Thread(target=generate)
-    thread.start()
-    # Stream the results with formatting
-    generated_text = ""
-    while True:
-        try:
-            new_text = stream_capture.text_queue.get(timeout=30)
-            if new_text is None:
-                break
-            generated_text += new_text
-            # Format and yield the current text with <think> and <ser> blocks
-            formatted_text = format_thinking_text(generated_text)
-            yield formatted_text
-        except queue.Empty:
-            break
-    thread.join(timeout=1)
     # Final yield with complete formatted text
-    if generated_text:
-        final_text = format_thinking_text(generated_text)
-        yield final_text
-    else:
-        yield "No response generated."
 def chat_interface(message, history, max_tokens, temperature, top_p):
-    """Main chat interface with improved streaming for messages format"""
     if not message.strip():
         return history, ""
-    # Add user message to history (messages format)
-    history.append({"role": "user", "content": message})
     # Generate response with streaming
-    # Convert messages format to tuples for generate_response compatibility
-    history_tuples = []
-    for i in range(0, len(history) - 1, 2):  # Process pairs
-        user_msg = history[i] if i < len(history) else None
-        assistant_msg = history[i + 1] if i + 1 < len(history) else None
-        if user_msg and user_msg.get("role") == "user":
-            user_content = user_msg.get("content", "")
-            assistant_content = assistant_msg.get("content", "") if assistant_msg and assistant_msg.get("role") == "assistant" else ""
-            history_tuples.append([user_content, assistant_content])
-    # Add assistant message placeholder
-    history.append({"role": "assistant", "content": ""})
-    # Generate response with streaming
-    for partial_response in generate_response(message, history_tuples, max_tokens, temperature, top_p):
-        history[-1]["content"] = partial_response
         yield history, ""
     return history, ""
@@ -272,499 +174,209 @@ def chat_interface(message, history, max_tokens, temperature, top_p):
 print("Initializing model...")
 load_model()
-# Custom CSS for modern, professional styling
 custom_css = """
-/* Import Google Fonts */
-@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
-/* Global styling */
-.gradio-container {
-    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    min-height: 100vh;
-}
-/* Main container styling */
-.main {
-    background: rgba(255, 255, 255, 0.95);
-    backdrop-filter: blur(20px);
-    border-radius: 24px;
-    box-shadow: 0 20px 40px rgba(0,0,0,0.1);
-    margin: 20px;
-    padding: 32px;
-    border: 1px solid rgba(255, 255, 255, 0.2);
-}
-/* Header styling */
-.gradio-markdown h1 {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    -webkit-background-clip: text;
-    -webkit-text-fill-color: transparent;
-    background-clip: text;
-    font-weight: 700;
-    font-size: 3rem;
-    text-align: center;
-    margin-bottom: 1rem;
-    text-shadow: 0 2px 4px rgba(0,0,0,0.1);
-}
-.gradio-markdown h3 {
-    color: #4a5568;
-    font-weight: 600;
-    margin-top: 1.5rem;
-    margin-bottom: 0.5rem;
-}
-/* Chatbot styling */
 .chatbot {
-    font-size: 15px;
-    font-family: 'Inter', sans-serif;
-    background: #ffffff;
-    border-radius: 20px;
-    border: 1px solid #e2e8f0;
-    box-shadow: 0 8px 32px rgba(0,0,0,0.08);
-    overflow: hidden;
-}
-.chatbot .message {
-    padding: 16px 20px;
-    margin: 8px 12px;
-    border-radius: 16px;
-    line-height: 1.6;
-    box-shadow: 0 2px 8px rgba(0,0,0,0.06);
-    transition: all 0.2s ease;
 }
-.chatbot .message:hover {
-    transform: translateY(-1px);
-    box-shadow: 0 4px 12px rgba(0,0,0,0.1);
 }
-/* User message styling */
-.chatbot .message.user {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    color: white;
-    margin-left: 15%;
-    border-bottom-right-radius: 6px;
-    box-shadow: 0 4px 16px rgba(102, 126, 234, 0.3);
 }
-/* Assistant message styling */
-.chatbot .message.bot {
-    background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
-    color: #2d3748;
-    margin-right: 15%;
-    border-bottom-left-radius: 6px;
-    border: 1px solid #e2e8f0;
 }
-/* Enhanced thinking and SER block styling */
-.thinking-block, .ser-block {
     border-radius: 12px;
-    padding: 16px 20px;
-    margin: 16px 0;
-    font-family: 'Inter', sans-serif;
-    box-shadow: 0 4px 12px rgba(0,0,0,0.08);
-    position: relative;
-    overflow: hidden;
 }
-.thinking-block::before, .ser-block::before {
-    content: '';
-    position: absolute;
-    top: 0;
-    left: 0;
-    right: 0;
-    height: 3px;
-    background: linear-gradient(90deg, #4a90e2, #357abd);
 }
-/* Input styling */
-.gradio-textbox {
-    border-radius: 16px;
-    border: 2px solid #e2e8f0;
-    transition: all 0.3s ease;
-    font-family: 'Inter', sans-serif;
-    padding: 16px 20px;
-    font-size: 15px;
-    background: #ffffff;
-    box-shadow: 0 2px 8px rgba(0,0,0,0.04);
 }
-.gradio-textbox:focus {
-    border-color: #667eea;
-    box-shadow: 0 0 0 4px rgba(102, 126, 234, 0.1);
-    outline: none;
 }
 /* Button styling */
 .gradio-button {
-    border-radius: 14px;
-    font-weight: 600;
-    font-family: 'Inter', sans-serif;
-    transition: all 0.3s ease;
-    padding: 12px 24px;
-    font-size: 14px;
-    letter-spacing: 0.5px;
-    border: none;
-    cursor: pointer;
-    position: relative;
-    overflow: hidden;
-}
-.gradio-button.primary {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    color: white;
-    box-shadow: 0 4px 16px rgba(102, 126, 234, 0.3);
 }
-.gradio-button.primary:hover {
-    transform: translateY(-2px);
-    box-shadow: 0 8px 24px rgba(102, 126, 234, 0.4);
 }
-.gradio-button.secondary {
-    background: linear-gradient(135deg, #f7fafc 0%, #edf2f7 100%);
-    color: #4a5568;
-    border: 1px solid #e2e8f0;
 }
-.gradio-button.secondary:hover {
-    background: linear-gradient(135deg, #edf2f7 0%, #e2e8f0 100%);
-    transform: translateY(-1px);
-    box-shadow: 0 4px 12px rgba(0,0,0,0.1);
 }
 /* Slider styling */
 .gradio-slider {
-    margin: 12px 0;
-}
-.gradio-slider input[type="range"] {
-    -webkit-appearance: none;
-    height: 6px;
-    border-radius: 3px;
-    background: linear-gradient(135deg, #e2e8f0 0%, #cbd5e0 100%);
-    outline: none;
-}
-.gradio-slider input[type="range"]::-webkit-slider-thumb {
-    -webkit-appearance: none;
-    appearance: none;
-    width: 20px;
-    height: 20px;
-    border-radius: 50%;
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    cursor: pointer;
-    box-shadow: 0 2px 8px rgba(102, 126, 234, 0.3);
-    transition: all 0.2s ease;
-}
-.gradio-slider input[type="range"]::-webkit-slider-thumb:hover {
-    transform: scale(1.1);
-    box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
 }
 /* Examples styling */
 .gradio-examples {
-    margin-top: 24px;
-    background: rgba(255, 255, 255, 0.7);
-    backdrop-filter: blur(10px);
-    border-radius: 16px;
-    padding: 20px;
-    border: 1px solid rgba(255, 255, 255, 0.2);
 }
 .gradio-examples .gradio-button {
-    background: rgba(255, 255, 255, 0.9);
-    border: 1px solid #e2e8f0;
-    color: #4a5568;
     font-size: 13px;
-    padding: 12px 16px;
-    margin: 4px;
-    border-radius: 12px;
-    transition: all 0.2s ease;
-    backdrop-filter: blur(10px);
 }
 .gradio-examples .gradio-button:hover {
-    background: rgba(255, 255, 255, 1);
-    color: #2d3748;
-    transform: translateY(-1px);
-    box-shadow: 0 4px 12px rgba(0,0,0,0.1);
-}
-/* Code block styling */
-pre {
-    background: linear-gradient(135deg, #2d3748 0%, #4a5568 100%);
-    color: #e2e8f0;
-    border-radius: 12px;
-    padding: 20px;
-    overflow-x: auto;
-    font-family: 'JetBrains Mono', 'Consolas', 'Monaco', monospace;
-    font-size: 14px;
-    line-height: 1.5;
-    box-shadow: 0 4px 16px rgba(0,0,0,0.1);
-    border: 1px solid #4a5568;
-}
-/* Sidebar styling */
-.gradio-column {
-    background: rgba(255, 255, 255, 0.8);
-    backdrop-filter: blur(10px);
-    border-radius: 16px;
-    padding: 20px;
-    margin: 8px;
-    border: 1px solid rgba(255, 255, 255, 0.2);
-    box-shadow: 0 4px 16px rgba(0,0,0,0.05);
-}
-/* Footer styling */
-.gradio-markdown hr {
-    border: none;
-    height: 1px;
-    background: linear-gradient(90deg, transparent, #e2e8f0, transparent);
-    margin: 2rem 0;
-}
-/* Responsive design */
-@media (max-width: 768px) {
-    .main {
-        margin: 10px;
-        padding: 20px;
-        border-radius: 16px;
-    }
-    .gradio-markdown h1 {
-        font-size: 2rem;
-    }
-    .chatbot .message.user,
-    .chatbot .message.bot {
-        margin-left: 5%;
-        margin-right: 5%;
-    }
-}
-/* Loading animation */
-.loading {
-    display: inline-block;
-    width: 20px;
-    height: 20px;
-    border: 3px solid rgba(102, 126, 234, 0.3);
-    border-radius: 50%;
-    border-top-color: #667eea;
-    animation: spin 1s ease-in-out infinite;
-}
-@keyframes spin {
-    to { transform: rotate(360deg); }
-}
-/* Scroll styling */
-::-webkit-scrollbar {
-    width: 8px;
-}
-::-webkit-scrollbar-track {
-    background: #f1f1f1;
-    border-radius: 4px;
-}
-::-webkit-scrollbar-thumb {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    border-radius: 4px;
-}
-::-webkit-scrollbar-thumb:hover {
-    background: linear-gradient(135deg, #5a6fd8 0%, #6a4190 100%);
 }
 """
-# Create Gradio interface with modern design
 with gr.Blocks(
-    title="🤖 Dhanishtha-2.0-preview | Advanced Reasoning AI",
-    theme=gr.themes.Soft(
-        primary_hue="blue",
-        secondary_hue="purple",
-        neutral_hue="slate",
-        font=gr.themes.GoogleFont("Inter"),
-        font_mono=gr.themes.GoogleFont("JetBrains Mono")
-    ),
-    css=custom_css,
-    head="<link rel='icon' href='🤖' type='image/svg+xml'>"
 ) as demo:
-    # Header Section
-    gr.HTML("""
-    <div style="text-align: center; padding: 2rem 0; background: linear-gradient(135deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%); border-radius: 20px; margin-bottom: 2rem; border: 1px solid rgba(102, 126, 234, 0.2);">
-        <h1 style="margin: 0; font-size: 3.5rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 800;">
-            🤖 Dhanishtha-2.0-preview
-        </h1>
-        <p style="font-size: 1.2rem; color: #64748b; margin: 1rem 0; font-weight: 500;">
-            Advanced Reasoning AI with Transparent Thinking Process
-        </p>
-        <div style="display: flex; justify-content: center; gap: 2rem; flex-wrap: wrap; margin-top: 1.5rem;">
-            <div style="background: rgba(74, 144, 226, 0.1); padding: 0.8rem 1.5rem; border-radius: 12px; border: 1px solid rgba(74, 144, 226, 0.2);">
-                <span style="color: #4a90e2; font-weight: 600;">🧠 Multi-step Reasoning</span>
-            </div>
-            <div style="background: rgba(142, 68, 173, 0.1); padding: 0.8rem 1.5rem; border-radius: 12px; border: 1px solid rgba(142, 68, 173, 0.2);">
-                <span style="color: #8e44ad; font-weight: 600;">💜 Emotional Intelligence</span>
-            </div>
-            <div style="background: rgba(34, 197, 94, 0.1); padding: 0.8rem 1.5rem; border-radius: 12px; border: 1px solid rgba(34, 197, 94, 0.2);">
-                <span style="color: #22c55e; font-weight: 600;">🔄 Real-time Streaming</span>
-            </div>
-        </div>
-    </div>
-    """)
-    # Main Chat Interface
-    with gr.Row(equal_height=True):
-        with gr.Column(scale=4, min_width=600):
-            # Chat Area
-            with gr.Group():
-                chatbot = gr.Chatbot(
-                    [],
-                    elem_id="chatbot",
-                    height=650,
-                    show_copy_button=True,
-                    show_share_button=True,
-                    type='messages',  # Use openai-style messages format
-                    avatar_images=(
-                        "https://raw.githubusercontent.com/gradio-app/gradio/main/gradio/themes/utils/profile_avatar.png",
-                        "🤖"
-                    ),
-                    render_markdown=True,
-                    sanitize_html=False,  # Allow HTML for thinking blocks
-                    latex_delimiters=[
-                        {"left": "$$", "right": "$$", "display": True},
-                        {"left": "$", "right": "$", "display": False}
-                    ]
-                )
-            # Input Section
-            with gr.Group():
-                with gr.Row():
-                    msg = gr.Textbox(
-                        container=False,
-                        placeholder="💭 Ask me anything! I'll show you my thinking and emotional reasoning process...",
-                        label="",
-                        autofocus=True,
-                        scale=8,
-                        lines=1,
-                        max_lines=5
-                    )
-                    with gr.Column(scale=1, min_width=120):
-                        send_btn = gr.Button(
-                            "🚀 Send",
-                            variant="primary",
-                            size="lg"
-                        )
-                        clear_btn = gr.Button(
-                            "🗑️ Clear",
-                            variant="secondary",
-                            size="sm"
-                        )
-        # Settings Sidebar
-        with gr.Column(scale=1, min_width=350):
-            with gr.Group():
-                gr.HTML("""
-                <div style="text-align: center; padding: 1rem; background: linear-gradient(135deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%); border-radius: 12px; margin-bottom: 1rem;">
-                    <h3 style="margin: 0; color: #667eea; font-weight: 600;">⚙️ Generation Settings</h3>
-                </div>
-                """)
-                max_tokens = gr.Slider(
-                    minimum=1,
-                    maximum=40960,
-                    value=2048,
-                    step=1,
-                    label="🎯 Max Tokens",
-                    info="Maximum number of tokens to generate"
-                )
-                temperature = gr.Slider(
-                    minimum=0.1,
-                    maximum=2.0,
-                    value=0.7,
-                    step=0.1,
-                    label="🌡️ Temperature",
-                    info="Controls randomness in generation"
-                )
-                top_p = gr.Slider(
-                    minimum=0.1,
-                    maximum=1.0,
-                    value=0.9,
-                    step=0.05,
-                    label="🎲 Top-p (Nucleus Sampling)",
-                    info="Controls diversity of generation"
                 )
-                with gr.Row():
-                    stop_btn = gr.Button(
-                        "⏹️ Stop Generation",
-                        variant="stop",
-                        size="sm"
-                    )
-            # Model Information Panel
-            with gr.Group():
-                gr.HTML("""
-                <div style="background: linear-gradient(135deg, rgba(34, 197, 94, 0.1) 0%, rgba(59, 130, 246, 0.1) 100%); border-radius: 12px; padding: 1.5rem; border: 1px solid rgba(34, 197, 94, 0.2);">
-                    <h3 style="margin: 0 0 1rem 0; color: #22c55e; font-weight: 600;">📊 Model Information</h3>
-                    <div style="color: #64748b; line-height: 1.6;">
-                        <strong style="color: #1e293b;">Model:</strong> HelpingAI/Dhanishtha-2.0-preview<br>
-                        <strong style="color: #1e293b;">Type:</strong> Advanced Reasoning LLM<br>
-                        <strong style="color: #1e293b;">Features:</strong> Multi-step reasoning, emotional intelligence<br>
-                        <strong style="color: #1e293b;">Special:</strong> Transparent thinking process with &lt;think&gt; and &lt;ser&gt; blocks
-                    </div>
-                </div>
-                """)
-            # Performance Stats (placeholder)
-            with gr.Group():
-                gr.HTML("""
-                <div style="background: linear-gradient(135deg, rgba(168, 85, 247, 0.1) 0%, rgba(236, 72, 153, 0.1) 100%); border-radius: 12px; padding: 1.5rem; border: 1px solid rgba(168, 85, 247, 0.2);">
-                    <h3 style="margin: 0 0 1rem 0; color: #a855f7; font-weight: 600;">⚡ Performance</h3>
-                    <div style="color: #64748b; line-height: 1.6;">
-                        <strong style="color: #1e293b;">Status:</strong> <span style="color: #22c55e;">Active ✅</span><br>
-                        <strong style="color: #1e293b;">Response Mode:</strong> Streaming<br>
-                        <strong style="color: #1e293b;">Reasoning:</strong> Enhanced<br>
-                        <strong style="color: #1e293b;">Context:</strong> 8192 tokens
-                    </div>
-                </div>
-                """)
-    # Example Prompts Section
-    with gr.Group():
-        gr.HTML("""
-        <div style="text-align: center; padding: 1.5rem; background: linear-gradient(135deg, rgba(245, 158, 11, 0.1) 0%, rgba(251, 146, 60, 0.1) 100%); border-radius: 16px; margin: 2rem 0; border: 1px solid rgba(245, 158, 11, 0.2);">
-            <h3 style="margin: 0 0 1rem 0; color: #f59e0b; font-weight: 600;">💡 Example Prompts</h3>
-            <p style="color: #64748b; margin: 0;">Try these prompts to see the thinking and emotional reasoning process in action!</p>
-        </div>
-        """)
-        gr.Examples(
-            examples=[
-                ["Hello! Can you introduce yourself and show me your thinking and emotional reasoning process?"],
-                ["Solve this step by step: What is 15% of 240? Show your complete reasoning."],
-                ["Explain quantum entanglement in simple terms with your thought process"],
-                ["Write a short Python function to find the factorial of a number and explain your approach"],
-                ["What are the pros and cons of renewable energy? Include your emotional perspective using SER."],
-                ["Help me understand the difference between AI and machine learning with examples"],
-                ["Create a haiku about artificial intelligence and explain your creative process"],
-                ["Explain why the sky is blue using physics principles with step-by-step thinking"],
-                ["What's your favorite type of conversation and why? Show your emotional reasoning using SER format."],
-                ["How do you handle complex ethical dilemmas? Walk me through your thinking and emotional process."],
-                ["Tell me about a time when you had to change your mind about something. Use both thinking and SER blocks."],
-                ["What makes you feel most fulfilled in conversations? Use structured emotional reasoning."]
-            ],
-            inputs=msg,
-            label="",
-            examples_per_page=6
-        )
     # Event handlers
     def clear_chat():
         """Clear the chat history"""
@@ -794,43 +406,50 @@ with gr.Blocks(
         show_progress=False
     )
-    # Footer Section
-    gr.HTML("""
-    <div style="text-align: center; padding: 2rem; background: linear-gradient(135deg, rgba(71, 85, 105, 0.1) 0%, rgba(100, 116, 139, 0.1) 100%); border-radius: 16px; margin-top: 2rem; border: 1px solid rgba(71, 85, 105, 0.2);">
-        <h3 style="color: #475569; font-weight: 600; margin-bottom: 1rem;">🔧 Technical Specifications</h3>
-        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; color: #64748b; line-height: 1.6;">
-            <div>
-                <strong style="color: #1e293b;">Model:</strong> HelpingAI/Dhanishtha-2.0-preview<br>
-                <strong style="color: #1e293b;">Framework:</strong> Transformers + Gradio
-            </div>
-            <div>
-                <strong style="color: #1e293b;">Features:</strong> Real-time streaming<br>
-                <strong style="color: #1e293b;">Reasoning:</strong> Multi-step with transparency
-            </div>
-            <div>
-                <strong style="color: #1e293b;">Special Tags:</strong> &lt;think&gt; and &lt;ser&gt; blocks<br>
-                <strong style="color: #1e293b;">Sampling:</strong> Custom temperature & top-p
-            </div>
-        </div>
-        <hr style="border: none; height: 1px; background: linear-gradient(90deg, transparent, #e2e8f0, transparent); margin: 1.5rem 0;">
-        <p style="color: #64748b; margin: 0; font-size: 14px;">
-            🚀 <strong>Built with ❤️ using Gradio and Transformers</strong> |
-            💡 The first LLM to show transparent thinking and emotional reasoning processes
-        </p>
-    </div>
-    """)
 if __name__ == "__main__":
     demo.queue(
-        max_size=30,
-        default_concurrency_limit=2
     ).launch(
         server_name="0.0.0.0",
         server_port=7860,
         share=False,
         show_error=True,
-        quiet=False,
-        favicon_path="🤖",
-        show_tips=True,
-        enable_queue=True
     )

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import spaces
 import re
 # Model configuration
     print("Model loaded successfully!")
 def format_thinking_text(text):
+    """Format text to properly display <think> tags in Gradio with blue border styling like HelpingAI"""
     if not text:
         return text
+    # More sophisticated formatting for thinking blocks with blue styling
     formatted_text = text
     # Handle thinking blocks with proper HTML-like styling for Gradio
 </div>
 </div>
 '''
     formatted_text = re.sub(thinking_pattern, replace_thinking_block, formatted_text, flags=re.DOTALL)
     # Clean up any remaining raw tags that might not have been caught
     formatted_text = re.sub(r'</?think>', '', formatted_text)
     return formatted_text.strip()
 @spaces.GPU()
 def generate_response(message, history, max_tokens, temperature, top_p):
+    """Generate streaming response without threading"""
     global model, tokenizer
     if model is None or tokenizer is None:
         yield "Model is still loading. Please wait..."
         return
     # Prepare conversation history
     messages = []
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
     # Add current message
     messages.append({"role": "user", "content": message})
     # Apply chat template
     text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True
     )
     # Tokenize input
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    try:
+        with torch.no_grad():
+            # Use transformers streaming with custom approach
+            generated_text = ""
+            current_input_ids = model_inputs["input_ids"]
+            current_attention_mask = model_inputs["attention_mask"]
+            for _ in range(max_tokens):
+                # Generate next token
+                outputs = model(
+                    input_ids=current_input_ids,
+                    attention_mask=current_attention_mask,
+                    use_cache=True
+                )
+                # Get logits for the last token
+                logits = outputs.logits[0, -1, :]
+                # Apply temperature
+                if temperature != 1.0:
+                    logits = logits / temperature
+                # Apply top-p sampling
+                if top_p < 1.0:
+                    sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+                    cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
+                    sorted_indices_to_remove = cumulative_probs > top_p
+                    sorted_indices_to_remove[1:] = sorted_indices_to_remove[:-1].clone()
+                    sorted_indices_to_remove[0] = 0
+                    indices_to_remove = sorted_indices[sorted_indices_to_remove]
+                    logits[indices_to_remove] = float('-inf')
+                # Sample next token
+                probs = torch.softmax(logits, dim=-1)
+                next_token = torch.multinomial(probs, num_samples=1)
+                # Check for EOS token
+                if next_token.item() == tokenizer.eos_token_id:
+                    break
+                # Decode the new token (preserve special tokens like <think>)
+                new_token_text = tokenizer.decode(next_token, skip_special_tokens=False)
+                generated_text += new_token_text
+                # Format and yield the current text
+                formatted_text = format_thinking_text(generated_text)
+                yield formatted_text
+                # Update inputs for next iteration
+                current_input_ids = torch.cat([current_input_ids, next_token.unsqueeze(0)], dim=-1)
+                current_attention_mask = torch.cat([current_attention_mask, torch.ones((1, 1), device=model.device)], dim=-1)
+    except Exception as e:
+        yield f"Error generating response: {str(e)}"
+        return
     # Final yield with complete formatted text
+    final_text = format_thinking_text(generated_text) if generated_text else "No response generated."
+    yield final_text
 def chat_interface(message, history, max_tokens, temperature, top_p):
+    """Main chat interface with improved streaming"""
     if not message.strip():
         return history, ""
+    # Add user message to history
+    history.append([message, ""])
     # Generate response with streaming
+    for partial_response in generate_response(message, history[:-1], max_tokens, temperature, top_p):
+        history[-1][1] = partial_response
         yield history, ""
     return history, ""
 print("Initializing model...")
 load_model()
+# Custom CSS for better styling and thinking blocks
 custom_css = """
+/* Main chatbot styling */
 .chatbot {
+    font-size: 14px;
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 }
+/* Enhanced thinking block styling - now handled via inline HTML */
+.thinking-block {
+    background: linear-gradient(135deg, #f0f8ff 0%, #e6f3ff 100%);
+    border-left: 4px solid #4a90e2;
+    border-radius: 8px;
+    padding: 12px 16px;
+    margin: 12px 0;
+    font-family: 'Segoe UI', sans-serif;
+    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    position: relative;
 }
+/* Support for HTML content in chatbot */
+.chatbot .message {
+    overflow: visible;
 }
+.chatbot .message div {
+    max-width: none;
 }
+/* Message styling */
+.message {
+    padding: 10px 14px;
+    margin: 6px 0;
     border-radius: 12px;
+    line-height: 1.5;
 }
+.user-message {
+    background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
+    margin-left: 15%;
+    border-bottom-right-radius: 4px;
 }
+.assistant-message {
+    background: linear-gradient(135deg, #f5f5f5 0%, #eeeeee 100%);
+    margin-right: 15%;
+    border-bottom-left-radius: 4px;
 }
+/* Code block styling */
+pre {
+    background-color: #f8f9fa;
+    border: 1px solid #e9ecef;
+    border-radius: 6px;
+    padding: 12px;
+    overflow-x: auto;
+    font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
+    font-size: 13px;
+    line-height: 1.4;
 }
 /* Button styling */
 .gradio-button {
+    border-radius: 8px;
+    font-weight: 500;
+    transition: all 0.2s ease;
 }
+.gradio-button:hover {
+    transform: translateY(-1px);
+    box-shadow: 0 4px 8px rgba(0,0,0,0.15);
 }
+/* Input styling */
+.gradio-textbox {
+    border-radius: 8px;
+    border: 2px solid #e0e0e0;
+    transition: border-color 0.2s ease;
 }
+.gradio-textbox:focus {
+    border-color: #4a90e2;
+    box-shadow: 0 0 0 3px rgba(74, 144, 226, 0.1);
 }
 /* Slider styling */
 .gradio-slider {
+    margin: 8px 0;
 }
 /* Examples styling */
 .gradio-examples {
+    margin-top: 16px;
 }
 .gradio-examples .gradio-button {
+    background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
+    border: 1px solid #dee2e6;
+    color: #495057;
     font-size: 13px;
+    padding: 8px 12px;
 }
 .gradio-examples .gradio-button:hover {
+    background: linear-gradient(135deg, #e9ecef 0%, #dee2e6 100%);
+    color: #212529;
 }
 """
+# Create Gradio interface
 with gr.Blocks(
+    title="🤖 Dhanishtha-2.0-preview Chat",
+    theme=gr.themes.Soft(),
+    css=custom_css
 ) as demo:
+    gr.Markdown(
+        """
+        # 🤖 Dhanishtha-2.0-preview Chat
+        Chat with the **HelpingAI/Dhanishtha-2.0-preview** model - The world's first LLM designed to think between responses!
+        ### ✨ Key Features:
+        - 🧠 **Multi-step Reasoning**: Unlike other LLMs that think once, Dhanishtha can think, rethink, self-evaluate, and refine using multiple `<think>` blocks
+        - 🔄 **Iterative Thinking**: Watch the model's thought process unfold in real-time
+        - 💡 **Enhanced Problem Solving**: Better reasoning capabilities through structured thinking
+        **Note**: The `<think>` blocks show the model's internal reasoning process and will be displayed in a formatted way below.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=4):
+            chatbot = gr.Chatbot(
+                [],
+                elem_id="chatbot",
+                bubble_full_width=False,
+                height=600,
+                show_copy_button=True,
+                show_share_button=True,
+                avatar_images=("👤", "🤖"),
+                render_markdown=True,
+                sanitize_html=False,  # Allow HTML for thinking blocks
+                latex_delimiters=[
+                    {"left": "$$", "right": "$$", "display": True},
+                    {"left": "$", "right": "$", "display": False}
+                ]
+            )
+            with gr.Row():
+                msg = gr.Textbox(
+                    container=False,
+                    placeholder="Ask me anything! I'll show you my thinking process...",
+                    label="Message",
+                    autofocus=True,
+                    scale=8,
+                    lines=1,
+                    max_lines=5
                 )
+                send_btn = gr.Button("🚀 Send", variant="primary", scale=1, size="lg")
+        with gr.Column(scale=1, min_width=300):
+            gr.Markdown("### ⚙️ Generation Parameters")
+            max_tokens = gr.Slider(
+                minimum=50,
+                maximum=8192,
+                value=2048,
+                step=50,
+                label="🎯 Max Tokens",
+                info="Maximum number of tokens to generate"
+            )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=2.0,
+                value=0.7,
+                step=0.1,
+                label="🌡️ Temperature",
+                info="Higher = more creative, Lower = more focused"
+            )
+            top_p = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.9,
+                step=0.05,
+                label="🎲 Top-p",
+                info="Nucleus sampling threshold"
+            )
+            with gr.Row():
+                clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary", scale=1)
+                stop_btn = gr.Button("⏹️ Stop", variant="stop", scale=1)
+            gr.Markdown("### 📊 Model Info")
+            gr.Markdown(
+                """
+                **Model**: HelpingAI/Dhanishtha-2.0-preview
+                **Type**: Reasoning LLM with thinking blocks
+                **Features**: Multi-step reasoning, self-evaluation
+                """
+            )
     # Event handlers
     def clear_chat():
         """Clear the chat history"""
         show_progress=False
     )
+    # Example prompts section
+    with gr.Row():
+        gr.Examples(
+            examples=[
+                ["Hello! Can you introduce yourself and show me how you think?"],
+                ["Solve this step by step: What is 15% of 240?"],
+                ["Explain quantum entanglement in simple terms"],
+                ["Write a short Python function to find the factorial of a number"],
+                ["What are the pros and cons of renewable energy?"],
+                ["Help me understand the difference between AI and machine learning"],
+                ["Create a haiku about artificial intelligence"],
+                ["Explain why the sky is blue using physics principles"]
+            ],
+            inputs=msg,
+            label="💡 Example Prompts - Try these to see the thinking process!",
+            examples_per_page=4
+        )
+    # Footer with information
+    gr.Markdown(
+        """
+        ---
+        ### 🔧 Technical Details
+        - **Model**: HelpingAI/Dhanishtha-2.0-preview
+        - **Framework**: Transformers + Gradio
+        - **Features**: Real-time streaming, thinking process visualization, custom sampling
+        - **Reasoning**: Multi-step thinking with `<think>` blocks for transparent AI reasoning
+        **Note**: This interface streams responses token by token and formats thinking blocks for better readability.
+        The model's internal reasoning process is displayed in formatted code blocks.
+        ---
+        *Built with ❤️ using Gradio and Transformers*
+        """
+    )
 if __name__ == "__main__":
     demo.queue(
+        max_size=20,
+        default_concurrency_limit=1
     ).launch(
         server_name="0.0.0.0",
         server_port=7860,
         share=False,
         show_error=True,
+        quiet=False
     )