import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer import spaces import re # Model configuration model_name = "HelpingAI/Dhanishtha-2.0-preview" # Global variables for model and tokenizer model = None tokenizer = None def load_model(): """Load the model and tokenizer""" global model, tokenizer print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(model_name) # Ensure pad token is set if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("Loading model...") model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype="auto", device_map="auto", trust_remote_code=True ) print("Model loaded successfully!") def format_thinking_text(text): """Format text to properly display and tags in Gradio with styled borders""" if not text: return text # More sophisticated formatting for thinking and ser blocks formatted_text = text # Handle thinking blocks with blue styling thinking_pattern = r'(.*?)' def replace_thinking_block(match): thinking_content = match.group(1).strip() return f'''

🧠 Think

{thinking_content}

''' # Handle ser blocks with green styling ser_pattern = r'(.*?)' def replace_ser_block(match): ser_content = match.group(1).strip() return f'''

💚 Ser

{ser_content}

''' # Apply both patterns formatted_text = re.sub(thinking_pattern, replace_thinking_block, formatted_text, flags=re.DOTALL) formatted_text = re.sub(ser_pattern, replace_ser_block, formatted_text, flags=re.DOTALL) # Clean up any remaining raw tags formatted_text = re.sub(r'', '', formatted_text) return formatted_text.strip() @spaces.GPU() def generate_response(message, history, max_tokens, temperature, top_p): """Generate streaming response without threading""" global model, tokenizer if model is None or tokenizer is None: yield "Model is still loading. Please wait..." return # Prepare conversation history messages = [] # Handle both old tuple format and new message format for item in history: if isinstance(item, dict): # New message format messages.append(item) elif isinstance(item, (list, tuple)) and len(item) == 2: # Old tuple format user_msg, assistant_msg = item messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Add current message messages.append({"role": "user", "content": message}) # Apply chat template text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # Tokenize input model_inputs = tokenizer([text], return_tensors="pt").to(model.device) try: with torch.no_grad(): # Use transformers streaming with custom approach generated_text = "" current_input_ids = model_inputs["input_ids"] current_attention_mask = model_inputs["attention_mask"] for _ in range(max_tokens): # Generate next token outputs = model( input_ids=current_input_ids, attention_mask=current_attention_mask, use_cache=True ) # Get logits for the last token logits = outputs.logits[0, -1, :] # Apply temperature if temperature != 1.0: logits = logits / temperature # Apply top-p sampling if top_p < 1.0: sorted_logits, sorted_indices = torch.sort(logits, descending=True) cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1) sorted_indices_to_remove = cumulative_probs > top_p sorted_indices_to_remove[1:] = sorted_indices_to_remove[:-1].clone() sorted_indices_to_remove[0] = 0 indices_to_remove = sorted_indices[sorted_indices_to_remove] logits[indices_to_remove] = float('-inf') # Sample next token probs = torch.softmax(logits, dim=-1) next_token = torch.multinomial(probs, num_samples=1) # Check for EOS token if next_token.item() == tokenizer.eos_token_id: break # Decode the new token (preserve special tokens like ) new_token_text = tokenizer.decode(next_token, skip_special_tokens=False) generated_text += new_token_text # Format and yield the current text formatted_text = format_thinking_text(generated_text) yield formatted_text # Update inputs for next iteration current_input_ids = torch.cat([current_input_ids, next_token.unsqueeze(0)], dim=-1) current_attention_mask = torch.cat([current_attention_mask, torch.ones((1, 1), device=model.device)], dim=-1) except Exception as e: yield f"Error generating response: {str(e)}" return # Final yield with complete formatted text final_text = format_thinking_text(generated_text) if generated_text else "No response generated." yield final_text def chat_interface(message, history, max_tokens, temperature, top_p): """Main chat interface with improved streaming""" if not message.strip(): return history, "" # Add user message to history in the new message format history.append({"role": "user", "content": message}) # Add placeholder for assistant response history.append({"role": "assistant", "content": ""}) # Generate response with streaming for partial_response in generate_response(message, history[:-2], max_tokens, temperature, top_p): history[-1]["content"] = partial_response yield history, "" return history, "" # Load model on startup print("Initializing model...") load_model() # Advanced CSS for professional UI design custom_css = """ /* Import Google Fonts */ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap'); /* Global styling */ * { font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; } /* Main container */ .gradio-container { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); min-height: 100vh; } /* Header styling */ .header-container { background: rgba(255, 255, 255, 0.95); backdrop-filter: blur(20px); border-radius: 20px; padding: 24px; margin-bottom: 24px; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1); border: 1px solid rgba(255, 255, 255, 0.2); } /* Main chat container */ .chat-container { background: rgba(255, 255, 255, 0.95); backdrop-filter: blur(20px); border-radius: 20px; padding: 24px; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1); border: 1px solid rgba(255, 255, 255, 0.2); } /* Chatbot styling */ .chatbot { font-size: 15px; font-family: 'Inter', sans-serif; background: transparent; border: none; border-radius: 16px; overflow: hidden; } .chatbot .message { margin: 12px 0; padding: 0; background: transparent; } .chatbot .message.user { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 18px 18px 4px 18px; padding: 16px 20px; margin-left: 20%; box-shadow: 0 4px 16px rgba(102, 126, 234, 0.3); } .chatbot .message.bot { background: linear-gradient(135deg, #f8f9fa 0%, #ffffff 100%); color: #2c3e50; border-radius: 18px 18px 18px 4px; padding: 16px 20px; margin-right: 20%; box-shadow: 0 4px 16px rgba(0, 0, 0, 0.05); border: 1px solid rgba(0, 0, 0, 0.05); } /* Input styling */ .input-container { background: rgba(255, 255, 255, 0.9); border-radius: 25px; padding: 8px; box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1); border: 2px solid transparent; transition: all 0.3s ease; } .input-container:focus-within { border-color: #667eea; box-shadow: 0 4px 20px rgba(102, 126, 234, 0.2); } .gradio-textbox { border: none !important; background: transparent !important; font-size: 15px; padding: 12px 20px; border-radius: 20px; font-family: 'Inter', sans-serif; } .gradio-textbox:focus { outline: none !important; box-shadow: none !important; } /* Button styling */ .gradio-button { border-radius: 20px !important; font-weight: 600 !important; font-family: 'Inter', sans-serif !important; transition: all 0.3s ease !important; border: none !important; font-size: 14px !important; padding: 12px 24px !important; } .gradio-button.primary { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; color: white !important; box-shadow: 0 4px 16px rgba(102, 126, 234, 0.3) !important; } .gradio-button.primary:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4) !important; } .gradio-button.secondary { background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%) !important; color: #495057 !important; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1) !important; } .gradio-button.secondary:hover { transform: translateY(-1px) !important; box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15) !important; } /* Parameter panel styling */ .parameter-panel { background: rgba(255, 255, 255, 0.9); backdrop-filter: blur(20px); border-radius: 20px; padding: 24px; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1); border: 1px solid rgba(255, 255, 255, 0.2); } /* Slider styling */ .gradio-slider { margin: 16px 0 !important; } .gradio-slider .wrap { background: linear-gradient(135deg, #f8f9fa 0%, #ffffff 100%); border-radius: 12px; padding: 16px; border: 1px solid rgba(0, 0, 0, 0.05); } /* Examples styling */ .gradio-examples { margin-top: 20px; } .gradio-examples .gradio-button { background: rgba(255, 255, 255, 0.8) !important; border: 1px solid rgba(102, 126, 234, 0.2) !important; color: #667eea !important; font-size: 13px !important; padding: 10px 16px !important; margin: 4px !important; border-radius: 15px !important; transition: all 0.2s ease !important; } .gradio-examples .gradio-button:hover { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; color: white !important; transform: translateY(-1px) !important; box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3) !important; } /* Footer styling */ .footer-container { background: rgba(255, 255, 255, 0.9); backdrop-filter: blur(20px); border-radius: 20px; padding: 20px; margin-top: 24px; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1); border: 1px solid rgba(255, 255, 255, 0.2); text-align: center; } /* Animations */ @keyframes fadeInUp { from { opacity: 0; transform: translateY(30px); } to { opacity: 1; transform: translateY(0); } } @keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.5; } } @keyframes slideIn { from { opacity: 0; transform: translateX(-20px); } to { opacity: 1; transform: translateX(0); } } .animate-fade-in { animation: fadeInUp 0.6s ease-out; } .animate-slide-in { animation: slideIn 0.4s ease-out; } /* Loading states */ .loading { position: relative; overflow: hidden; } .loading::after { content: ''; position: absolute; top: 0; left: -100%; width: 100%; height: 100%; background: linear-gradient(90deg, transparent, rgba(255,255,255,0.4), transparent); animation: shimmer 1.5s infinite; } @keyframes shimmer { 0% { left: -100%; } 100% { left: 100%; } } /* Responsive design */ @media (max-width: 768px) { .chat-container, .parameter-panel, .header-container { margin: 12px; padding: 16px; border-radius: 16px; } .chatbot .message.user { margin-left: 10%; } .chatbot .message.bot { margin-right: 10%; } } /* Dark mode support */ @media (prefers-color-scheme: dark) { .gradio-container { background: linear-gradient(135deg, #2d3748 0%, #4a5568 100%); } .chat-container, .parameter-panel, .header-container, .footer-container { background: rgba(45, 55, 72, 0.95); color: #e2e8f0; } .chatbot .message.bot { background: linear-gradient(135deg, #4a5568 0%, #2d3748 100%); color: #e2e8f0; } } """ # Create advanced Gradio interface with professional design with gr.Blocks( title="� Dhanishtha-2.0-preview | Advanced Reasoning AI", theme=gr.themes.Soft(), css=custom_css, head=""" """ ) as demo: # Header Section with gr.Row(): with gr.Column(): gr.HTML("""

🧠 Dhanishtha-2.0-preview

Advanced Reasoning AI with Multi-Step Thinking

""") # Feature highlights with gr.Row(): with gr.Column(scale=1): gr.HTML("""

🧠

Think Blocks

Internal reasoning process

""") with gr.Column(scale=1): gr.HTML("""

💚

Ser Blocks

Emotional understanding

""") with gr.Column(scale=1): gr.HTML("""

⚡

Real-time

Streaming responses

""") with gr.Column(scale=1): gr.HTML("""

🎯

Precise

Step-by-step solutions

""") # Main Chat Interface with gr.Row(): with gr.Column(scale=4): # Status indicator gr.HTML("""

Model Ready

HelpingAI/Dhanishtha-2.0-preview

""") chatbot = gr.Chatbot( [], elem_id="chatbot", type='messages', height=650, show_copy_button=True, show_share_button=True, avatar_images=("👤", "🤖"), render_markdown=True, sanitize_html=False, # Allow HTML for thinking and ser blocks latex_delimiters=[ {"left": "$$", "right": "$$", "display": True}, {"left": "$", "right": "$", "display": False} ], container=True, scale=1 ) # Enhanced input section with gr.Row(): with gr.Column(scale=1): msg = gr.Textbox( container=False, placeholder="💭 Ask me anything! I'll show you my thinking and reasoning process...", label="", autofocus=True, lines=1, max_lines=4 ) with gr.Column(scale=0, min_width=120): with gr.Row(): send_btn = gr.Button( "🚀 Send", variant="primary", size="lg" ) clear_btn = gr.Button( "🗑️", variant="secondary", size="lg" ) with gr.Column(scale=1, min_width=320): # Parameter header gr.HTML("""

⚙️ Generation Settings

Fine-tune the AI's responses

""") # Enhanced sliders with better styling max_tokens = gr.Slider( minimum=50, maximum=8192, value=2048, step=50, label="🎯 Max Tokens", info="Maximum number of tokens to generate" ) temperature = gr.Slider( minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="🌡️ Temperature", info="Higher = more creative, Lower = more focused" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="🎲 Top-p", info="Nucleus sampling threshold" ) # Action buttons with gr.Row(): stop_btn = gr.Button( "⏹️ Stop", variant="stop", scale=1 ) # Model information card gr.HTML("""

🧠 Model Information

Model: Dhanishtha-2.0-preview

Type: Reasoning LLM

Features: Think + Ser blocks

💡 Tip

Ask complex questions to see multi-step reasoning in action!

""") # Enhanced Examples Section with gr.Row(): with gr.Column(): gr.HTML("""

💡 Try These Examples

Click any example to see the AI's thinking process

""") gr.Examples( examples=[ ["🧮 Solve this step by step: What is 15% of 240?"], ["🔤 How many letter 'r' are in the words 'strawberry' and 'raspberry'?"], ["🤖 Hello! Can you introduce yourself and show me how you think?"], ["⚛️ Explain quantum entanglement in simple terms with examples"], ["🐍 Write a Python function to find the factorial of a number"], ["🌱 What are the pros and cons of renewable energy sources?"], ["🧠 What's the difference between AI and machine learning?"], ["🎨 Create a haiku about artificial intelligence and consciousness"], ["🌌 Why is the sky blue? Explain using physics principles"], ["📊 Compare bubble sort and quick sort algorithms"], ["🎯 Plan a 7-day trip to Japan with budget considerations"], ["🔬 How does CRISPR gene editing work in simple terms?"] ], inputs=msg, examples_per_page=6 ) # Event handlers def clear_chat(): """Clear the chat history""" return [], "" # Message submission events msg.submit( chat_interface, inputs=[msg, chatbot, max_tokens, temperature, top_p], outputs=[chatbot, msg], concurrency_limit=1, show_progress="minimal" ) send_btn.click( chat_interface, inputs=[msg, chatbot, max_tokens, temperature, top_p], outputs=[chatbot, msg], concurrency_limit=1, show_progress="minimal" ) # Clear chat event clear_btn.click( clear_chat, outputs=[chatbot, msg], show_progress=False ) # Enhanced Footer with gr.Row(): with gr.Column(): gr.HTML("""

Technical Specifications

🧠

Model Architecture

Transformer-based reasoning

⚡

Real-time Streaming

Token-by-token generation

🎯

Advanced Reasoning

Multi-step thinking process

🔧

Custom Sampling

Temperature & Top-p control

🌟 Key Features

Think Blocks Ser Blocks Real-time Streaming LaTeX Support Code Highlighting

Built with ❤️ using Gradio and Transformers | Model: HelpingAI/Dhanishtha-2.0-preview

Experience the future of AI reasoning with transparent thinking processes

""") if __name__ == "__main__": # Launch with enhanced configuration demo.queue( max_size=20, default_concurrency_limit=1 ).launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True, quiet=False )