import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer import threading import queue import time import spaces import sys from io import StringIO import re # Model configuration model_name = "HelpingAI/Dhanishtha-2.0-preview" # Global variables for model and tokenizer model = None tokenizer = None def load_model(): """Load the model and tokenizer""" global model, tokenizer print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(model_name) # Ensure pad token is set if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("Loading model...") model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype="auto", device_map="auto", trust_remote_code=True ) print("Model loaded successfully!") class StreamCapture: """Capture streaming output from TextStreamer""" def __init__(self): self.text_queue = queue.Queue() self.captured_text = "" def write(self, text): """Capture written text""" if text and text.strip(): self.captured_text += text self.text_queue.put(text) return len(text) def flush(self): """Flush method for compatibility""" pass def get_text(self): """Get all captured text""" return self.captured_text def reset(self): """Reset the capture""" self.captured_text = "" while not self.text_queue.empty(): try: self.text_queue.get_nowait() except queue.Empty: break def format_thinking_text(text): """Format text to properly display and tags in Gradio with styled borders""" if not text: return text # More sophisticated formatting for thinking and SER blocks formatted_text = text # Handle thinking blocks with proper HTML-like styling for Gradio thinking_pattern = r'(.*?)' def replace_thinking_block(match): thinking_content = match.group(1).strip() # Use HTML div with inline CSS for blue border styling like HelpingAI return f'''

🧠 Think

{thinking_content}

''' # Handle SER blocks with purple/violet styling and structured formatting ser_pattern = r'(.*?)' def replace_ser_block(match): ser_content = match.group(1).strip() # Parse structured SER content if it follows the pattern ser_lines = ser_content.split('\n') formatted_content = [] for line in ser_lines: line = line.strip() if not line: continue # Check if line has the "Key ==> Value" pattern if ' ==> ' in line: parts = line.split(' ==> ', 1) if len(parts) == 2: key = parts[0].strip() value = parts[1].strip() formatted_content.append(f'

{key}: {value}

') else: formatted_content.append(f'

{line}

') else: formatted_content.append(f'

{line}

') if not formatted_content: formatted_content = [f'

{ser_content}

'] content_html = ''.join(formatted_content) # Use HTML div with inline CSS for purple border styling for SER return f'''

💜 SER (Structured Emotional Reasoning)

{content_html}

''' formatted_text = re.sub(thinking_pattern, replace_thinking_block, formatted_text, flags=re.DOTALL) formatted_text = re.sub(ser_pattern, replace_ser_block, formatted_text, flags=re.DOTALL) # Clean up any remaining raw tags that might not have been caught formatted_text = re.sub(r'', '', formatted_text) formatted_text = re.sub(r'', '', formatted_text) return formatted_text.strip() @spaces.GPU() def generate_response(message, history, max_tokens, temperature, top_p): """Generate streaming response with improved TextStreamer""" global model, tokenizer if model is None or tokenizer is None: yield "Model is still loading. Please wait..." return # Prepare conversation history messages = [] for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Add current message messages.append({"role": "user", "content": message}) # Apply chat template text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # Tokenize input model_inputs = tokenizer([text], return_tensors="pt").to(model.device) # Create stream capture stream_capture = StreamCapture() # Create TextStreamer with our capture - don't skip special tokens to preserve and streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=False) # Temporarily redirect the streamer's output original_stdout = sys.stdout # Generation parameters generation_kwargs = { **model_inputs, "max_new_tokens": max_tokens, "temperature": temperature, "top_p": top_p, "do_sample": True, "pad_token_id": tokenizer.eos_token_id, "streamer": streamer, } # Start generation in a separate thread def generate(): try: # Redirect stdout to capture streamer output sys.stdout = stream_capture with torch.no_grad(): model.generate(**generation_kwargs) except Exception as e: stream_capture.text_queue.put(f"Error: {str(e)}") finally: # Restore stdout sys.stdout = original_stdout stream_capture.text_queue.put(None) # Signal end thread = threading.Thread(target=generate) thread.start() # Stream the results with formatting generated_text = "" while True: try: new_text = stream_capture.text_queue.get(timeout=30) if new_text is None: break generated_text += new_text # Format and yield the current text with and blocks formatted_text = format_thinking_text(generated_text) yield formatted_text except queue.Empty: break thread.join(timeout=1) # Final yield with complete formatted text if generated_text: final_text = format_thinking_text(generated_text) yield final_text else: yield "No response generated." def chat_interface(message, history, max_tokens, temperature, top_p): """Main chat interface with improved streaming""" if not message.strip(): return history, "" # Add user message to history history.append([message, ""]) # Generate response with streaming for partial_response in generate_response(message, history[:-1], max_tokens, temperature, top_p): history[-1][1] = partial_response yield history, "" return history, "" # Load model on startup print("Initializing model...") load_model() # Custom CSS for modern, professional styling custom_css = """ /* Import Google Fonts */ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap'); /* Global styling */ .gradio-container { font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); min-height: 100vh; } /* Main container styling */ .main { background: rgba(255, 255, 255, 0.95); backdrop-filter: blur(20px); border-radius: 24px; box-shadow: 0 20px 40px rgba(0,0,0,0.1); margin: 20px; padding: 32px; border: 1px solid rgba(255, 255, 255, 0.2); } /* Header styling */ .gradio-markdown h1 { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-weight: 700; font-size: 3rem; text-align: center; margin-bottom: 1rem; text-shadow: 0 2px 4px rgba(0,0,0,0.1); } .gradio-markdown h3 { color: #4a5568; font-weight: 600; margin-top: 1.5rem; margin-bottom: 0.5rem; } /* Chatbot styling */ .chatbot { font-size: 15px; font-family: 'Inter', sans-serif; background: #ffffff; border-radius: 20px; border: 1px solid #e2e8f0; box-shadow: 0 8px 32px rgba(0,0,0,0.08); overflow: hidden; } .chatbot .message { padding: 16px 20px; margin: 8px 12px; border-radius: 16px; line-height: 1.6; box-shadow: 0 2px 8px rgba(0,0,0,0.06); transition: all 0.2s ease; } .chatbot .message:hover { transform: translateY(-1px); box-shadow: 0 4px 12px rgba(0,0,0,0.1); } /* User message styling */ .chatbot .message.user { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; margin-left: 15%; border-bottom-right-radius: 6px; box-shadow: 0 4px 16px rgba(102, 126, 234, 0.3); } /* Assistant message styling */ .chatbot .message.bot { background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%); color: #2d3748; margin-right: 15%; border-bottom-left-radius: 6px; border: 1px solid #e2e8f0; } /* Enhanced thinking and SER block styling */ .thinking-block, .ser-block { border-radius: 12px; padding: 16px 20px; margin: 16px 0; font-family: 'Inter', sans-serif; box-shadow: 0 4px 12px rgba(0,0,0,0.08); position: relative; overflow: hidden; } .thinking-block::before, .ser-block::before { content: ''; position: absolute; top: 0; left: 0; right: 0; height: 3px; background: linear-gradient(90deg, #4a90e2, #357abd); } /* Input styling */ .gradio-textbox { border-radius: 16px; border: 2px solid #e2e8f0; transition: all 0.3s ease; font-family: 'Inter', sans-serif; padding: 16px 20px; font-size: 15px; background: #ffffff; box-shadow: 0 2px 8px rgba(0,0,0,0.04); } .gradio-textbox:focus { border-color: #667eea; box-shadow: 0 0 0 4px rgba(102, 126, 234, 0.1); outline: none; } /* Button styling */ .gradio-button { border-radius: 14px; font-weight: 600; font-family: 'Inter', sans-serif; transition: all 0.3s ease; padding: 12px 24px; font-size: 14px; letter-spacing: 0.5px; border: none; cursor: pointer; position: relative; overflow: hidden; } .gradio-button.primary { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; box-shadow: 0 4px 16px rgba(102, 126, 234, 0.3); } .gradio-button.primary:hover { transform: translateY(-2px); box-shadow: 0 8px 24px rgba(102, 126, 234, 0.4); } .gradio-button.secondary { background: linear-gradient(135deg, #f7fafc 0%, #edf2f7 100%); color: #4a5568; border: 1px solid #e2e8f0; } .gradio-button.secondary:hover { background: linear-gradient(135deg, #edf2f7 0%, #e2e8f0 100%); transform: translateY(-1px); box-shadow: 0 4px 12px rgba(0,0,0,0.1); } /* Slider styling */ .gradio-slider { margin: 12px 0; } .gradio-slider input[type="range"] { -webkit-appearance: none; height: 6px; border-radius: 3px; background: linear-gradient(135deg, #e2e8f0 0%, #cbd5e0 100%); outline: none; } .gradio-slider input[type="range"]::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; width: 20px; height: 20px; border-radius: 50%; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); cursor: pointer; box-shadow: 0 2px 8px rgba(102, 126, 234, 0.3); transition: all 0.2s ease; } .gradio-slider input[type="range"]::-webkit-slider-thumb:hover { transform: scale(1.1); box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4); } /* Examples styling */ .gradio-examples { margin-top: 24px; background: rgba(255, 255, 255, 0.7); backdrop-filter: blur(10px); border-radius: 16px; padding: 20px; border: 1px solid rgba(255, 255, 255, 0.2); } .gradio-examples .gradio-button { background: rgba(255, 255, 255, 0.9); border: 1px solid #e2e8f0; color: #4a5568; font-size: 13px; padding: 12px 16px; margin: 4px; border-radius: 12px; transition: all 0.2s ease; backdrop-filter: blur(10px); } .gradio-examples .gradio-button:hover { background: rgba(255, 255, 255, 1); color: #2d3748; transform: translateY(-1px); box-shadow: 0 4px 12px rgba(0,0,0,0.1); } /* Code block styling */ pre { background: linear-gradient(135deg, #2d3748 0%, #4a5568 100%); color: #e2e8f0; border-radius: 12px; padding: 20px; overflow-x: auto; font-family: 'JetBrains Mono', 'Consolas', 'Monaco', monospace; font-size: 14px; line-height: 1.5; box-shadow: 0 4px 16px rgba(0,0,0,0.1); border: 1px solid #4a5568; } /* Sidebar styling */ .gradio-column { background: rgba(255, 255, 255, 0.8); backdrop-filter: blur(10px); border-radius: 16px; padding: 20px; margin: 8px; border: 1px solid rgba(255, 255, 255, 0.2); box-shadow: 0 4px 16px rgba(0,0,0,0.05); } /* Footer styling */ .gradio-markdown hr { border: none; height: 1px; background: linear-gradient(90deg, transparent, #e2e8f0, transparent); margin: 2rem 0; } /* Responsive design */ @media (max-width: 768px) { .main { margin: 10px; padding: 20px; border-radius: 16px; } .gradio-markdown h1 { font-size: 2rem; } .chatbot .message.user, .chatbot .message.bot { margin-left: 5%; margin-right: 5%; } } /* Loading animation */ .loading { display: inline-block; width: 20px; height: 20px; border: 3px solid rgba(102, 126, 234, 0.3); border-radius: 50%; border-top-color: #667eea; animation: spin 1s ease-in-out infinite; } @keyframes spin { to { transform: rotate(360deg); } } /* Scroll styling */ ::-webkit-scrollbar { width: 8px; } ::-webkit-scrollbar-track { background: #f1f1f1; border-radius: 4px; } ::-webkit-scrollbar-thumb { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 4px; } ::-webkit-scrollbar-thumb:hover { background: linear-gradient(135deg, #5a6fd8 0%, #6a4190 100%); } """ # Create Gradio interface with modern design with gr.Blocks( title="🤖 Dhanishtha-2.0-preview | Advanced Reasoning AI", theme=gr.themes.Soft( primary_hue="blue", secondary_hue="purple", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), font_mono=gr.themes.GoogleFont("JetBrains Mono") ), css=custom_css, head="" ) as demo: # Header Section gr.HTML("""

🤖 Dhanishtha-2.0-preview

Advanced Reasoning AI with Transparent Thinking Process

🧠 Multi-step Reasoning

💜 Emotional Intelligence

🔄 Real-time Streaming

""") # Main Chat Interface with gr.Row(equal_height=True): with gr.Column(scale=4, min_width=600): # Chat Area with gr.Group(): chatbot = gr.Chatbot( [], elem_id="chatbot", bubble_full_width=False, height=650, show_copy_button=True, show_share_button=True, avatar_images=( "https://raw.githubusercontent.com/gradio-app/gradio/main/gradio/themes/utils/profile_avatar.png", "🤖" ), render_markdown=True, sanitize_html=False, # Allow HTML for thinking blocks latex_delimiters=[ {"left": "$$", "right": "$$", "display": True}, {"left": "$", "right": "$", "display": False} ], elem_classes=["modern-chatbot"] ) # Input Section with gr.Group(): with gr.Row(): msg = gr.Textbox( container=False, placeholder="💭 Ask me anything! I'll show you my thinking and emotional reasoning process...", label="", autofocus=True, scale=8, lines=1, max_lines=5, elem_classes=["modern-input"] ) with gr.Column(scale=1, min_width=120): send_btn = gr.Button( "🚀 Send", variant="primary", size="lg", elem_classes=["send-button"] ) clear_btn = gr.Button( "🗑️ Clear", variant="secondary", size="sm", elem_classes=["clear-button"] ) # Settings Sidebar with gr.Column(scale=1, min_width=350): with gr.Group(): gr.HTML("""

⚙️ Generation Settings

""") max_tokens = gr.Slider( minimum=1, maximum=40960, value=2048, step=1, label="🎯 Max Tokens", info="Maximum number of tokens to generate", elem_classes=["modern-slider"] ) temperature = gr.Slider( minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="🌡️ Temperature", info="Controls randomness in generation", elem_classes=["modern-slider"] ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="🎲 Top-p (Nucleus Sampling)", info="Controls diversity of generation", elem_classes=["modern-slider"] ) with gr.Row(): stop_btn = gr.Button( "⏹️ Stop Generation", variant="stop", size="sm", elem_classes=["stop-button"] ) # Model Information Panel with gr.Group(): gr.HTML("""

📊 Model Information

Model: HelpingAI/Dhanishtha-2.0-preview
Type: Advanced Reasoning LLM
Features: Multi-step reasoning, emotional intelligence
Special: Transparent thinking process with <think> and <ser> blocks

""") # Performance Stats (placeholder) with gr.Group(): gr.HTML("""

⚡ Performance

Status: Active ✅
Response Mode: Streaming
Reasoning: Enhanced
Context: 8192 tokens

""") # Example Prompts Section with gr.Group(): gr.HTML("""

💡 Example Prompts

Try these prompts to see the thinking and emotional reasoning process in action!

""") gr.Examples( examples=[ ["Hello! Can you introduce yourself and show me your thinking and emotional reasoning process?"], ["Solve this step by step: What is 15% of 240? Show your complete reasoning."], ["Explain quantum entanglement in simple terms with your thought process"], ["Write a short Python function to find the factorial of a number and explain your approach"], ["What are the pros and cons of renewable energy? Include your emotional perspective using SER."], ["Help me understand the difference between AI and machine learning with examples"], ["Create a haiku about artificial intelligence and explain your creative process"], ["Explain why the sky is blue using physics principles with step-by-step thinking"], ["What's your favorite type of conversation and why? Show your emotional reasoning using SER format."], ["How do you handle complex ethical dilemmas? Walk me through your thinking and emotional process."], ["Tell me about a time when you had to change your mind about something. Use both thinking and SER blocks."], ["What makes you feel most fulfilled in conversations? Use structured emotional reasoning."] ], inputs=msg, label="", examples_per_page=6, elem_classes=["modern-examples"] ) # Event handlers def clear_chat(): """Clear the chat history""" return [], "" # Message submission events msg.submit( chat_interface, inputs=[msg, chatbot, max_tokens, temperature, top_p], outputs=[chatbot, msg], concurrency_limit=1, show_progress="minimal" ) send_btn.click( chat_interface, inputs=[msg, chatbot, max_tokens, temperature, top_p], outputs=[chatbot, msg], concurrency_limit=1, show_progress="minimal" ) # Clear chat event clear_btn.click( clear_chat, outputs=[chatbot, msg], show_progress=False ) # Footer Section gr.HTML("""

🔧 Technical Specifications

Model: HelpingAI/Dhanishtha-2.0-preview
Framework: Transformers + Gradio

Features: Real-time streaming
Reasoning: Multi-step with transparency

Special Tags: <think> and <ser> blocks
Sampling: Custom temperature & top-p

🚀 Built with ❤️ using Gradio and Transformers | 💡 The first LLM to show transparent thinking and emotional reasoning processes

""") if __name__ == "__main__": demo.queue( max_size=30, default_concurrency_limit=2 ).launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True, quiet=False, favicon_path="🤖", show_tips=True, enable_queue=True )