Abhaykoul commited on
Commit
8e5bed7
Β·
verified Β·
1 Parent(s): 04e28a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +262 -643
app.py CHANGED
@@ -1,12 +1,7 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
4
- import threading
5
- import queue
6
- import time
7
  import spaces
8
- import sys
9
- from io import StringIO
10
  import re
11
 
12
  # Model configuration
@@ -37,42 +32,12 @@ def load_model():
37
 
38
  print("Model loaded successfully!")
39
 
40
- class StreamCapture:
41
- """Capture streaming output from TextStreamer"""
42
- def __init__(self):
43
- self.text_queue = queue.Queue()
44
- self.captured_text = ""
45
-
46
- def write(self, text):
47
- """Capture written text"""
48
- if text and text.strip():
49
- self.captured_text += text
50
- self.text_queue.put(text)
51
- return len(text)
52
-
53
- def flush(self):
54
- """Flush method for compatibility"""
55
- pass
56
-
57
- def get_text(self):
58
- """Get all captured text"""
59
- return self.captured_text
60
-
61
- def reset(self):
62
- """Reset the capture"""
63
- self.captured_text = ""
64
- while not self.text_queue.empty():
65
- try:
66
- self.text_queue.get_nowait()
67
- except queue.Empty:
68
- break
69
-
70
  def format_thinking_text(text):
71
- """Format text to properly display <think> and <ser> tags in Gradio with styled borders"""
72
  if not text:
73
  return text
74
 
75
- # More sophisticated formatting for thinking and SER blocks
76
  formatted_text = text
77
 
78
  # Handle thinking blocks with proper HTML-like styling for Gradio
@@ -92,178 +57,115 @@ def format_thinking_text(text):
92
  </div>
93
  </div>
94
 
95
- '''
96
-
97
- # Handle SER blocks with purple/violet styling and structured formatting
98
- ser_pattern = r'<ser>(.*?)</ser>'
99
-
100
- def replace_ser_block(match):
101
- ser_content = match.group(1).strip()
102
-
103
- # Parse structured SER content if it follows the pattern
104
- ser_lines = ser_content.split('\n')
105
- formatted_content = []
106
-
107
- for line in ser_lines:
108
- line = line.strip()
109
- if not line:
110
- continue
111
-
112
- # Check if line has the "Key ==> Value" pattern
113
- if ' ==> ' in line:
114
- parts = line.split(' ==> ', 1)
115
- if len(parts) == 2:
116
- key = parts[0].strip()
117
- value = parts[1].strip()
118
- formatted_content.append(f'<div style="margin: 8px 0;"><strong style="color: #8e44ad;">{key}:</strong> <span style="color: #2c3e50;">{value}</span></div>')
119
- else:
120
- formatted_content.append(f'<div style="margin: 4px 0; color: #2c3e50;">{line}</div>')
121
- else:
122
- formatted_content.append(f'<div style="margin: 4px 0; color: #2c3e50;">{line}</div>')
123
-
124
- if not formatted_content:
125
- formatted_content = [f'<div style="color: #2c3e50; line-height: 1.6;">{ser_content}</div>']
126
-
127
- content_html = ''.join(formatted_content)
128
-
129
- # Use HTML div with inline CSS for purple border styling for SER
130
- return f'''
131
-
132
- <div style="border-left: 4px solid #8e44ad; background: linear-gradient(135deg, #f8f4ff 0%, #ede7f6 100%); padding: 16px 20px; margin: 16px 0; border-radius: 12px; font-family: 'Segoe UI', sans-serif; box-shadow: 0 2px 8px rgba(142, 68, 173, 0.15); border: 1px solid rgba(142, 68, 173, 0.2);">
133
- <div style="color: #8e44ad; font-weight: 600; margin-bottom: 10px; display: flex; align-items: center; font-size: 14px;">
134
- <span style="margin-right: 8px;">πŸ’œ</span> SER (Structured Emotional Reasoning)
135
- </div>
136
- <div style="line-height: 1.6; font-size: 14px;">
137
- {content_html}
138
- </div>
139
- </div>
140
-
141
  '''
142
 
143
  formatted_text = re.sub(thinking_pattern, replace_thinking_block, formatted_text, flags=re.DOTALL)
144
- formatted_text = re.sub(ser_pattern, replace_ser_block, formatted_text, flags=re.DOTALL)
145
 
146
  # Clean up any remaining raw tags that might not have been caught
147
  formatted_text = re.sub(r'</?think>', '', formatted_text)
148
- formatted_text = re.sub(r'</?ser>', '', formatted_text)
149
 
150
  return formatted_text.strip()
151
 
152
  @spaces.GPU()
153
  def generate_response(message, history, max_tokens, temperature, top_p):
154
- """Generate streaming response with improved TextStreamer"""
155
  global model, tokenizer
156
-
157
  if model is None or tokenizer is None:
158
  yield "Model is still loading. Please wait..."
159
  return
160
-
161
  # Prepare conversation history
162
  messages = []
163
  for user_msg, assistant_msg in history:
164
  messages.append({"role": "user", "content": user_msg})
165
  if assistant_msg:
166
  messages.append({"role": "assistant", "content": assistant_msg})
167
-
168
  # Add current message
169
  messages.append({"role": "user", "content": message})
170
-
171
  # Apply chat template
172
  text = tokenizer.apply_chat_template(
173
  messages,
174
  tokenize=False,
175
  add_generation_prompt=True
176
  )
177
-
178
  # Tokenize input
179
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
180
-
181
- # Create stream capture
182
- stream_capture = StreamCapture()
183
-
184
- # Create TextStreamer with our capture - don't skip special tokens to preserve <think> and <ser>
185
- streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=False)
186
-
187
- # Temporarily redirect the streamer's output
188
- original_stdout = sys.stdout
189
-
190
- # Generation parameters
191
- generation_kwargs = {
192
- **model_inputs,
193
- "max_new_tokens": max_tokens,
194
- "temperature": temperature,
195
- "top_p": top_p,
196
- "do_sample": True,
197
- "pad_token_id": tokenizer.eos_token_id,
198
- "streamer": streamer,
199
- }
200
-
201
- # Start generation in a separate thread
202
- def generate():
203
- try:
204
- # Redirect stdout to capture streamer output
205
- sys.stdout = stream_capture
206
- with torch.no_grad():
207
- model.generate(**generation_kwargs)
208
- except Exception as e:
209
- stream_capture.text_queue.put(f"Error: {str(e)}")
210
- finally:
211
- # Restore stdout
212
- sys.stdout = original_stdout
213
- stream_capture.text_queue.put(None) # Signal end
214
-
215
- thread = threading.Thread(target=generate)
216
- thread.start()
217
-
218
- # Stream the results with formatting
219
- generated_text = ""
220
- while True:
221
- try:
222
- new_text = stream_capture.text_queue.get(timeout=30)
223
- if new_text is None:
224
- break
225
- generated_text += new_text
226
- # Format and yield the current text with <think> and <ser> blocks
227
- formatted_text = format_thinking_text(generated_text)
228
- yield formatted_text
229
- except queue.Empty:
230
- break
231
-
232
- thread.join(timeout=1)
233
-
 
 
 
234
  # Final yield with complete formatted text
235
- if generated_text:
236
- final_text = format_thinking_text(generated_text)
237
- yield final_text
238
- else:
239
- yield "No response generated."
240
 
241
  def chat_interface(message, history, max_tokens, temperature, top_p):
242
- """Main chat interface with improved streaming for messages format"""
243
  if not message.strip():
244
  return history, ""
245
 
246
- # Add user message to history (messages format)
247
- history.append({"role": "user", "content": message})
248
 
249
  # Generate response with streaming
250
- # Convert messages format to tuples for generate_response compatibility
251
- history_tuples = []
252
- for i in range(0, len(history) - 1, 2): # Process pairs
253
- user_msg = history[i] if i < len(history) else None
254
- assistant_msg = history[i + 1] if i + 1 < len(history) else None
255
-
256
- if user_msg and user_msg.get("role") == "user":
257
- user_content = user_msg.get("content", "")
258
- assistant_content = assistant_msg.get("content", "") if assistant_msg and assistant_msg.get("role") == "assistant" else ""
259
- history_tuples.append([user_content, assistant_content])
260
-
261
- # Add assistant message placeholder
262
- history.append({"role": "assistant", "content": ""})
263
-
264
- # Generate response with streaming
265
- for partial_response in generate_response(message, history_tuples, max_tokens, temperature, top_p):
266
- history[-1]["content"] = partial_response
267
  yield history, ""
268
 
269
  return history, ""
@@ -272,499 +174,209 @@ def chat_interface(message, history, max_tokens, temperature, top_p):
272
  print("Initializing model...")
273
  load_model()
274
 
275
- # Custom CSS for modern, professional styling
276
  custom_css = """
277
- /* Import Google Fonts */
278
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
279
-
280
- /* Global styling */
281
- .gradio-container {
282
- font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
283
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
284
- min-height: 100vh;
285
- }
286
-
287
- /* Main container styling */
288
- .main {
289
- background: rgba(255, 255, 255, 0.95);
290
- backdrop-filter: blur(20px);
291
- border-radius: 24px;
292
- box-shadow: 0 20px 40px rgba(0,0,0,0.1);
293
- margin: 20px;
294
- padding: 32px;
295
- border: 1px solid rgba(255, 255, 255, 0.2);
296
- }
297
-
298
- /* Header styling */
299
- .gradio-markdown h1 {
300
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
301
- -webkit-background-clip: text;
302
- -webkit-text-fill-color: transparent;
303
- background-clip: text;
304
- font-weight: 700;
305
- font-size: 3rem;
306
- text-align: center;
307
- margin-bottom: 1rem;
308
- text-shadow: 0 2px 4px rgba(0,0,0,0.1);
309
- }
310
-
311
- .gradio-markdown h3 {
312
- color: #4a5568;
313
- font-weight: 600;
314
- margin-top: 1.5rem;
315
- margin-bottom: 0.5rem;
316
- }
317
-
318
- /* Chatbot styling */
319
  .chatbot {
320
- font-size: 15px;
321
- font-family: 'Inter', sans-serif;
322
- background: #ffffff;
323
- border-radius: 20px;
324
- border: 1px solid #e2e8f0;
325
- box-shadow: 0 8px 32px rgba(0,0,0,0.08);
326
- overflow: hidden;
327
- }
328
-
329
- .chatbot .message {
330
- padding: 16px 20px;
331
- margin: 8px 12px;
332
- border-radius: 16px;
333
- line-height: 1.6;
334
- box-shadow: 0 2px 8px rgba(0,0,0,0.06);
335
- transition: all 0.2s ease;
336
  }
337
 
338
- .chatbot .message:hover {
339
- transform: translateY(-1px);
340
- box-shadow: 0 4px 12px rgba(0,0,0,0.1);
 
 
 
 
 
 
 
341
  }
342
 
343
- /* User message styling */
344
- .chatbot .message.user {
345
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
346
- color: white;
347
- margin-left: 15%;
348
- border-bottom-right-radius: 6px;
349
- box-shadow: 0 4px 16px rgba(102, 126, 234, 0.3);
350
  }
351
 
352
- /* Assistant message styling */
353
- .chatbot .message.bot {
354
- background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
355
- color: #2d3748;
356
- margin-right: 15%;
357
- border-bottom-left-radius: 6px;
358
- border: 1px solid #e2e8f0;
359
  }
360
 
361
- /* Enhanced thinking and SER block styling */
362
- .thinking-block, .ser-block {
 
 
363
  border-radius: 12px;
364
- padding: 16px 20px;
365
- margin: 16px 0;
366
- font-family: 'Inter', sans-serif;
367
- box-shadow: 0 4px 12px rgba(0,0,0,0.08);
368
- position: relative;
369
- overflow: hidden;
370
  }
371
 
372
- .thinking-block::before, .ser-block::before {
373
- content: '';
374
- position: absolute;
375
- top: 0;
376
- left: 0;
377
- right: 0;
378
- height: 3px;
379
- background: linear-gradient(90deg, #4a90e2, #357abd);
380
  }
381
 
382
- /* Input styling */
383
- .gradio-textbox {
384
- border-radius: 16px;
385
- border: 2px solid #e2e8f0;
386
- transition: all 0.3s ease;
387
- font-family: 'Inter', sans-serif;
388
- padding: 16px 20px;
389
- font-size: 15px;
390
- background: #ffffff;
391
- box-shadow: 0 2px 8px rgba(0,0,0,0.04);
392
  }
393
 
394
- .gradio-textbox:focus {
395
- border-color: #667eea;
396
- box-shadow: 0 0 0 4px rgba(102, 126, 234, 0.1);
397
- outline: none;
 
 
 
 
 
 
398
  }
399
 
400
  /* Button styling */
401
  .gradio-button {
402
- border-radius: 14px;
403
- font-weight: 600;
404
- font-family: 'Inter', sans-serif;
405
- transition: all 0.3s ease;
406
- padding: 12px 24px;
407
- font-size: 14px;
408
- letter-spacing: 0.5px;
409
- border: none;
410
- cursor: pointer;
411
- position: relative;
412
- overflow: hidden;
413
- }
414
-
415
- .gradio-button.primary {
416
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
417
- color: white;
418
- box-shadow: 0 4px 16px rgba(102, 126, 234, 0.3);
419
  }
420
 
421
- .gradio-button.primary:hover {
422
- transform: translateY(-2px);
423
- box-shadow: 0 8px 24px rgba(102, 126, 234, 0.4);
424
  }
425
 
426
- .gradio-button.secondary {
427
- background: linear-gradient(135deg, #f7fafc 0%, #edf2f7 100%);
428
- color: #4a5568;
429
- border: 1px solid #e2e8f0;
 
430
  }
431
 
432
- .gradio-button.secondary:hover {
433
- background: linear-gradient(135deg, #edf2f7 0%, #e2e8f0 100%);
434
- transform: translateY(-1px);
435
- box-shadow: 0 4px 12px rgba(0,0,0,0.1);
436
  }
437
 
438
  /* Slider styling */
439
  .gradio-slider {
440
- margin: 12px 0;
441
- }
442
-
443
- .gradio-slider input[type="range"] {
444
- -webkit-appearance: none;
445
- height: 6px;
446
- border-radius: 3px;
447
- background: linear-gradient(135deg, #e2e8f0 0%, #cbd5e0 100%);
448
- outline: none;
449
- }
450
-
451
- .gradio-slider input[type="range"]::-webkit-slider-thumb {
452
- -webkit-appearance: none;
453
- appearance: none;
454
- width: 20px;
455
- height: 20px;
456
- border-radius: 50%;
457
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
458
- cursor: pointer;
459
- box-shadow: 0 2px 8px rgba(102, 126, 234, 0.3);
460
- transition: all 0.2s ease;
461
- }
462
-
463
- .gradio-slider input[type="range"]::-webkit-slider-thumb:hover {
464
- transform: scale(1.1);
465
- box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
466
  }
467
 
468
  /* Examples styling */
469
  .gradio-examples {
470
- margin-top: 24px;
471
- background: rgba(255, 255, 255, 0.7);
472
- backdrop-filter: blur(10px);
473
- border-radius: 16px;
474
- padding: 20px;
475
- border: 1px solid rgba(255, 255, 255, 0.2);
476
  }
477
 
478
  .gradio-examples .gradio-button {
479
- background: rgba(255, 255, 255, 0.9);
480
- border: 1px solid #e2e8f0;
481
- color: #4a5568;
482
  font-size: 13px;
483
- padding: 12px 16px;
484
- margin: 4px;
485
- border-radius: 12px;
486
- transition: all 0.2s ease;
487
- backdrop-filter: blur(10px);
488
  }
489
 
490
  .gradio-examples .gradio-button:hover {
491
- background: rgba(255, 255, 255, 1);
492
- color: #2d3748;
493
- transform: translateY(-1px);
494
- box-shadow: 0 4px 12px rgba(0,0,0,0.1);
495
- }
496
-
497
- /* Code block styling */
498
- pre {
499
- background: linear-gradient(135deg, #2d3748 0%, #4a5568 100%);
500
- color: #e2e8f0;
501
- border-radius: 12px;
502
- padding: 20px;
503
- overflow-x: auto;
504
- font-family: 'JetBrains Mono', 'Consolas', 'Monaco', monospace;
505
- font-size: 14px;
506
- line-height: 1.5;
507
- box-shadow: 0 4px 16px rgba(0,0,0,0.1);
508
- border: 1px solid #4a5568;
509
- }
510
-
511
- /* Sidebar styling */
512
- .gradio-column {
513
- background: rgba(255, 255, 255, 0.8);
514
- backdrop-filter: blur(10px);
515
- border-radius: 16px;
516
- padding: 20px;
517
- margin: 8px;
518
- border: 1px solid rgba(255, 255, 255, 0.2);
519
- box-shadow: 0 4px 16px rgba(0,0,0,0.05);
520
- }
521
-
522
- /* Footer styling */
523
- .gradio-markdown hr {
524
- border: none;
525
- height: 1px;
526
- background: linear-gradient(90deg, transparent, #e2e8f0, transparent);
527
- margin: 2rem 0;
528
- }
529
-
530
- /* Responsive design */
531
- @media (max-width: 768px) {
532
- .main {
533
- margin: 10px;
534
- padding: 20px;
535
- border-radius: 16px;
536
- }
537
-
538
- .gradio-markdown h1 {
539
- font-size: 2rem;
540
- }
541
-
542
- .chatbot .message.user,
543
- .chatbot .message.bot {
544
- margin-left: 5%;
545
- margin-right: 5%;
546
- }
547
- }
548
-
549
- /* Loading animation */
550
- .loading {
551
- display: inline-block;
552
- width: 20px;
553
- height: 20px;
554
- border: 3px solid rgba(102, 126, 234, 0.3);
555
- border-radius: 50%;
556
- border-top-color: #667eea;
557
- animation: spin 1s ease-in-out infinite;
558
- }
559
-
560
- @keyframes spin {
561
- to { transform: rotate(360deg); }
562
- }
563
-
564
- /* Scroll styling */
565
- ::-webkit-scrollbar {
566
- width: 8px;
567
- }
568
-
569
- ::-webkit-scrollbar-track {
570
- background: #f1f1f1;
571
- border-radius: 4px;
572
- }
573
-
574
- ::-webkit-scrollbar-thumb {
575
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
576
- border-radius: 4px;
577
- }
578
-
579
- ::-webkit-scrollbar-thumb:hover {
580
- background: linear-gradient(135deg, #5a6fd8 0%, #6a4190 100%);
581
  }
582
  """
583
 
584
- # Create Gradio interface with modern design
585
  with gr.Blocks(
586
- title="πŸ€– Dhanishtha-2.0-preview | Advanced Reasoning AI",
587
- theme=gr.themes.Soft(
588
- primary_hue="blue",
589
- secondary_hue="purple",
590
- neutral_hue="slate",
591
- font=gr.themes.GoogleFont("Inter"),
592
- font_mono=gr.themes.GoogleFont("JetBrains Mono")
593
- ),
594
- css=custom_css,
595
- head="<link rel='icon' href='πŸ€–' type='image/svg+xml'>"
596
  ) as demo:
597
- # Header Section
598
- gr.HTML("""
599
- <div style="text-align: center; padding: 2rem 0; background: linear-gradient(135deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%); border-radius: 20px; margin-bottom: 2rem; border: 1px solid rgba(102, 126, 234, 0.2);">
600
- <h1 style="margin: 0; font-size: 3.5rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 800;">
601
- πŸ€– Dhanishtha-2.0-preview
602
- </h1>
603
- <p style="font-size: 1.2rem; color: #64748b; margin: 1rem 0; font-weight: 500;">
604
- Advanced Reasoning AI with Transparent Thinking Process
605
- </p>
606
- <div style="display: flex; justify-content: center; gap: 2rem; flex-wrap: wrap; margin-top: 1.5rem;">
607
- <div style="background: rgba(74, 144, 226, 0.1); padding: 0.8rem 1.5rem; border-radius: 12px; border: 1px solid rgba(74, 144, 226, 0.2);">
608
- <span style="color: #4a90e2; font-weight: 600;">🧠 Multi-step Reasoning</span>
609
- </div>
610
- <div style="background: rgba(142, 68, 173, 0.1); padding: 0.8rem 1.5rem; border-radius: 12px; border: 1px solid rgba(142, 68, 173, 0.2);">
611
- <span style="color: #8e44ad; font-weight: 600;">πŸ’œ Emotional Intelligence</span>
612
- </div>
613
- <div style="background: rgba(34, 197, 94, 0.1); padding: 0.8rem 1.5rem; border-radius: 12px; border: 1px solid rgba(34, 197, 94, 0.2);">
614
- <span style="color: #22c55e; font-weight: 600;">πŸ”„ Real-time Streaming</span>
615
- </div>
616
- </div>
617
- </div>
618
- """)
619
-
620
- # Main Chat Interface
621
- with gr.Row(equal_height=True):
622
- with gr.Column(scale=4, min_width=600):
623
- # Chat Area
624
- with gr.Group():
625
- chatbot = gr.Chatbot(
626
- [],
627
- elem_id="chatbot",
628
- height=650,
629
- show_copy_button=True,
630
- show_share_button=True,
631
- type='messages', # Use openai-style messages format
632
- avatar_images=(
633
- "https://raw.githubusercontent.com/gradio-app/gradio/main/gradio/themes/utils/profile_avatar.png",
634
- "πŸ€–"
635
- ),
636
- render_markdown=True,
637
- sanitize_html=False, # Allow HTML for thinking blocks
638
- latex_delimiters=[
639
- {"left": "$$", "right": "$$", "display": True},
640
- {"left": "$", "right": "$", "display": False}
641
- ]
642
- )
643
 
644
- # Input Section
645
- with gr.Group():
646
- with gr.Row():
647
- msg = gr.Textbox(
648
- container=False,
649
- placeholder="πŸ’­ Ask me anything! I'll show you my thinking and emotional reasoning process...",
650
- label="",
651
- autofocus=True,
652
- scale=8,
653
- lines=1,
654
- max_lines=5
655
- )
656
- with gr.Column(scale=1, min_width=120):
657
- send_btn = gr.Button(
658
- "πŸš€ Send",
659
- variant="primary",
660
- size="lg"
661
- )
662
- clear_btn = gr.Button(
663
- "πŸ—‘οΈ Clear",
664
- variant="secondary",
665
- size="sm"
666
- )
667
-
668
- # Settings Sidebar
669
- with gr.Column(scale=1, min_width=350):
670
- with gr.Group():
671
- gr.HTML("""
672
- <div style="text-align: center; padding: 1rem; background: linear-gradient(135deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%); border-radius: 12px; margin-bottom: 1rem;">
673
- <h3 style="margin: 0; color: #667eea; font-weight: 600;">βš™οΈ Generation Settings</h3>
674
- </div>
675
- """)
676
-
677
- max_tokens = gr.Slider(
678
- minimum=1,
679
- maximum=40960,
680
- value=2048,
681
- step=1,
682
- label="🎯 Max Tokens",
683
- info="Maximum number of tokens to generate"
684
- )
685
 
686
- temperature = gr.Slider(
687
- minimum=0.1,
688
- maximum=2.0,
689
- value=0.7,
690
- step=0.1,
691
- label="🌑️ Temperature",
692
- info="Controls randomness in generation"
693
- )
694
 
695
- top_p = gr.Slider(
696
- minimum=0.1,
697
- maximum=1.0,
698
- value=0.9,
699
- step=0.05,
700
- label="🎲 Top-p (Nucleus Sampling)",
701
- info="Controls diversity of generation"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
702
  )
703
-
704
- with gr.Row():
705
- stop_btn = gr.Button(
706
- "⏹️ Stop Generation",
707
- variant="stop",
708
- size="sm"
709
- )
710
-
711
- # Model Information Panel
712
- with gr.Group():
713
- gr.HTML("""
714
- <div style="background: linear-gradient(135deg, rgba(34, 197, 94, 0.1) 0%, rgba(59, 130, 246, 0.1) 100%); border-radius: 12px; padding: 1.5rem; border: 1px solid rgba(34, 197, 94, 0.2);">
715
- <h3 style="margin: 0 0 1rem 0; color: #22c55e; font-weight: 600;">πŸ“Š Model Information</h3>
716
- <div style="color: #64748b; line-height: 1.6;">
717
- <strong style="color: #1e293b;">Model:</strong> HelpingAI/Dhanishtha-2.0-preview<br>
718
- <strong style="color: #1e293b;">Type:</strong> Advanced Reasoning LLM<br>
719
- <strong style="color: #1e293b;">Features:</strong> Multi-step reasoning, emotional intelligence<br>
720
- <strong style="color: #1e293b;">Special:</strong> Transparent thinking process with &lt;think&gt; and &lt;ser&gt; blocks
721
- </div>
722
- </div>
723
- """)
724
-
725
- # Performance Stats (placeholder)
726
- with gr.Group():
727
- gr.HTML("""
728
- <div style="background: linear-gradient(135deg, rgba(168, 85, 247, 0.1) 0%, rgba(236, 72, 153, 0.1) 100%); border-radius: 12px; padding: 1.5rem; border: 1px solid rgba(168, 85, 247, 0.2);">
729
- <h3 style="margin: 0 0 1rem 0; color: #a855f7; font-weight: 600;">⚑ Performance</h3>
730
- <div style="color: #64748b; line-height: 1.6;">
731
- <strong style="color: #1e293b;">Status:</strong> <span style="color: #22c55e;">Active βœ…</span><br>
732
- <strong style="color: #1e293b;">Response Mode:</strong> Streaming<br>
733
- <strong style="color: #1e293b;">Reasoning:</strong> Enhanced<br>
734
- <strong style="color: #1e293b;">Context:</strong> 8192 tokens
735
- </div>
736
- </div>
737
- """)
 
 
 
 
 
 
 
 
 
738
 
739
- # Example Prompts Section
740
- with gr.Group():
741
- gr.HTML("""
742
- <div style="text-align: center; padding: 1.5rem; background: linear-gradient(135deg, rgba(245, 158, 11, 0.1) 0%, rgba(251, 146, 60, 0.1) 100%); border-radius: 16px; margin: 2rem 0; border: 1px solid rgba(245, 158, 11, 0.2);">
743
- <h3 style="margin: 0 0 1rem 0; color: #f59e0b; font-weight: 600;">πŸ’‘ Example Prompts</h3>
744
- <p style="color: #64748b; margin: 0;">Try these prompts to see the thinking and emotional reasoning process in action!</p>
745
- </div>
746
- """)
747
-
748
- gr.Examples(
749
- examples=[
750
- ["Hello! Can you introduce yourself and show me your thinking and emotional reasoning process?"],
751
- ["Solve this step by step: What is 15% of 240? Show your complete reasoning."],
752
- ["Explain quantum entanglement in simple terms with your thought process"],
753
- ["Write a short Python function to find the factorial of a number and explain your approach"],
754
- ["What are the pros and cons of renewable energy? Include your emotional perspective using SER."],
755
- ["Help me understand the difference between AI and machine learning with examples"],
756
- ["Create a haiku about artificial intelligence and explain your creative process"],
757
- ["Explain why the sky is blue using physics principles with step-by-step thinking"],
758
- ["What's your favorite type of conversation and why? Show your emotional reasoning using SER format."],
759
- ["How do you handle complex ethical dilemmas? Walk me through your thinking and emotional process."],
760
- ["Tell me about a time when you had to change your mind about something. Use both thinking and SER blocks."],
761
- ["What makes you feel most fulfilled in conversations? Use structured emotional reasoning."]
762
- ],
763
- inputs=msg,
764
- label="",
765
- examples_per_page=6
766
- )
767
-
768
  # Event handlers
769
  def clear_chat():
770
  """Clear the chat history"""
@@ -794,43 +406,50 @@ with gr.Blocks(
794
  show_progress=False
795
  )
796
 
797
- # Footer Section
798
- gr.HTML("""
799
- <div style="text-align: center; padding: 2rem; background: linear-gradient(135deg, rgba(71, 85, 105, 0.1) 0%, rgba(100, 116, 139, 0.1) 100%); border-radius: 16px; margin-top: 2rem; border: 1px solid rgba(71, 85, 105, 0.2);">
800
- <h3 style="color: #475569; font-weight: 600; margin-bottom: 1rem;">πŸ”§ Technical Specifications</h3>
801
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; color: #64748b; line-height: 1.6;">
802
- <div>
803
- <strong style="color: #1e293b;">Model:</strong> HelpingAI/Dhanishtha-2.0-preview<br>
804
- <strong style="color: #1e293b;">Framework:</strong> Transformers + Gradio
805
- </div>
806
- <div>
807
- <strong style="color: #1e293b;">Features:</strong> Real-time streaming<br>
808
- <strong style="color: #1e293b;">Reasoning:</strong> Multi-step with transparency
809
- </div>
810
- <div>
811
- <strong style="color: #1e293b;">Special Tags:</strong> &lt;think&gt; and &lt;ser&gt; blocks<br>
812
- <strong style="color: #1e293b;">Sampling:</strong> Custom temperature & top-p
813
- </div>
814
- </div>
815
- <hr style="border: none; height: 1px; background: linear-gradient(90deg, transparent, #e2e8f0, transparent); margin: 1.5rem 0;">
816
- <p style="color: #64748b; margin: 0; font-size: 14px;">
817
- πŸš€ <strong>Built with ❀️ using Gradio and Transformers</strong> |
818
- πŸ’‘ The first LLM to show transparent thinking and emotional reasoning processes
819
- </p>
820
- </div>
821
- """)
 
 
 
 
 
 
 
 
 
 
822
 
823
  if __name__ == "__main__":
824
  demo.queue(
825
- max_size=30,
826
- default_concurrency_limit=2
827
  ).launch(
828
  server_name="0.0.0.0",
829
  server_port=7860,
830
  share=False,
831
  show_error=True,
832
- quiet=False,
833
- favicon_path="πŸ€–",
834
- show_tips=True,
835
- enable_queue=True
836
  )
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
4
  import spaces
 
 
5
  import re
6
 
7
  # Model configuration
 
32
 
33
  print("Model loaded successfully!")
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def format_thinking_text(text):
36
+ """Format text to properly display <think> tags in Gradio with blue border styling like HelpingAI"""
37
  if not text:
38
  return text
39
 
40
+ # More sophisticated formatting for thinking blocks with blue styling
41
  formatted_text = text
42
 
43
  # Handle thinking blocks with proper HTML-like styling for Gradio
 
57
  </div>
58
  </div>
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  '''
61
 
62
  formatted_text = re.sub(thinking_pattern, replace_thinking_block, formatted_text, flags=re.DOTALL)
 
63
 
64
  # Clean up any remaining raw tags that might not have been caught
65
  formatted_text = re.sub(r'</?think>', '', formatted_text)
 
66
 
67
  return formatted_text.strip()
68
 
69
  @spaces.GPU()
70
  def generate_response(message, history, max_tokens, temperature, top_p):
71
+ """Generate streaming response without threading"""
72
  global model, tokenizer
73
+
74
  if model is None or tokenizer is None:
75
  yield "Model is still loading. Please wait..."
76
  return
77
+
78
  # Prepare conversation history
79
  messages = []
80
  for user_msg, assistant_msg in history:
81
  messages.append({"role": "user", "content": user_msg})
82
  if assistant_msg:
83
  messages.append({"role": "assistant", "content": assistant_msg})
84
+
85
  # Add current message
86
  messages.append({"role": "user", "content": message})
87
+
88
  # Apply chat template
89
  text = tokenizer.apply_chat_template(
90
  messages,
91
  tokenize=False,
92
  add_generation_prompt=True
93
  )
94
+
95
  # Tokenize input
96
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
97
+
98
+ try:
99
+ with torch.no_grad():
100
+ # Use transformers streaming with custom approach
101
+ generated_text = ""
102
+ current_input_ids = model_inputs["input_ids"]
103
+ current_attention_mask = model_inputs["attention_mask"]
104
+
105
+ for _ in range(max_tokens):
106
+ # Generate next token
107
+ outputs = model(
108
+ input_ids=current_input_ids,
109
+ attention_mask=current_attention_mask,
110
+ use_cache=True
111
+ )
112
+
113
+ # Get logits for the last token
114
+ logits = outputs.logits[0, -1, :]
115
+
116
+ # Apply temperature
117
+ if temperature != 1.0:
118
+ logits = logits / temperature
119
+
120
+ # Apply top-p sampling
121
+ if top_p < 1.0:
122
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
123
+ cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
124
+ sorted_indices_to_remove = cumulative_probs > top_p
125
+ sorted_indices_to_remove[1:] = sorted_indices_to_remove[:-1].clone()
126
+ sorted_indices_to_remove[0] = 0
127
+ indices_to_remove = sorted_indices[sorted_indices_to_remove]
128
+ logits[indices_to_remove] = float('-inf')
129
+
130
+ # Sample next token
131
+ probs = torch.softmax(logits, dim=-1)
132
+ next_token = torch.multinomial(probs, num_samples=1)
133
+
134
+ # Check for EOS token
135
+ if next_token.item() == tokenizer.eos_token_id:
136
+ break
137
+
138
+ # Decode the new token (preserve special tokens like <think>)
139
+ new_token_text = tokenizer.decode(next_token, skip_special_tokens=False)
140
+ generated_text += new_token_text
141
+
142
+ # Format and yield the current text
143
+ formatted_text = format_thinking_text(generated_text)
144
+ yield formatted_text
145
+
146
+ # Update inputs for next iteration
147
+ current_input_ids = torch.cat([current_input_ids, next_token.unsqueeze(0)], dim=-1)
148
+ current_attention_mask = torch.cat([current_attention_mask, torch.ones((1, 1), device=model.device)], dim=-1)
149
+
150
+ except Exception as e:
151
+ yield f"Error generating response: {str(e)}"
152
+ return
153
+
154
  # Final yield with complete formatted text
155
+ final_text = format_thinking_text(generated_text) if generated_text else "No response generated."
156
+ yield final_text
 
 
 
157
 
158
  def chat_interface(message, history, max_tokens, temperature, top_p):
159
+ """Main chat interface with improved streaming"""
160
  if not message.strip():
161
  return history, ""
162
 
163
+ # Add user message to history
164
+ history.append([message, ""])
165
 
166
  # Generate response with streaming
167
+ for partial_response in generate_response(message, history[:-1], max_tokens, temperature, top_p):
168
+ history[-1][1] = partial_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  yield history, ""
170
 
171
  return history, ""
 
174
  print("Initializing model...")
175
  load_model()
176
 
177
+ # Custom CSS for better styling and thinking blocks
178
  custom_css = """
179
+ /* Main chatbot styling */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  .chatbot {
181
+ font-size: 14px;
182
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  }
184
 
185
+ /* Enhanced thinking block styling - now handled via inline HTML */
186
+ .thinking-block {
187
+ background: linear-gradient(135deg, #f0f8ff 0%, #e6f3ff 100%);
188
+ border-left: 4px solid #4a90e2;
189
+ border-radius: 8px;
190
+ padding: 12px 16px;
191
+ margin: 12px 0;
192
+ font-family: 'Segoe UI', sans-serif;
193
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
194
+ position: relative;
195
  }
196
 
197
+ /* Support for HTML content in chatbot */
198
+ .chatbot .message {
199
+ overflow: visible;
 
 
 
 
200
  }
201
 
202
+ .chatbot .message div {
203
+ max-width: none;
 
 
 
 
 
204
  }
205
 
206
+ /* Message styling */
207
+ .message {
208
+ padding: 10px 14px;
209
+ margin: 6px 0;
210
  border-radius: 12px;
211
+ line-height: 1.5;
 
 
 
 
 
212
  }
213
 
214
+ .user-message {
215
+ background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
216
+ margin-left: 15%;
217
+ border-bottom-right-radius: 4px;
 
 
 
 
218
  }
219
 
220
+ .assistant-message {
221
+ background: linear-gradient(135deg, #f5f5f5 0%, #eeeeee 100%);
222
+ margin-right: 15%;
223
+ border-bottom-left-radius: 4px;
 
 
 
 
 
 
224
  }
225
 
226
+ /* Code block styling */
227
+ pre {
228
+ background-color: #f8f9fa;
229
+ border: 1px solid #e9ecef;
230
+ border-radius: 6px;
231
+ padding: 12px;
232
+ overflow-x: auto;
233
+ font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
234
+ font-size: 13px;
235
+ line-height: 1.4;
236
  }
237
 
238
  /* Button styling */
239
  .gradio-button {
240
+ border-radius: 8px;
241
+ font-weight: 500;
242
+ transition: all 0.2s ease;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  }
244
 
245
+ .gradio-button:hover {
246
+ transform: translateY(-1px);
247
+ box-shadow: 0 4px 8px rgba(0,0,0,0.15);
248
  }
249
 
250
+ /* Input styling */
251
+ .gradio-textbox {
252
+ border-radius: 8px;
253
+ border: 2px solid #e0e0e0;
254
+ transition: border-color 0.2s ease;
255
  }
256
 
257
+ .gradio-textbox:focus {
258
+ border-color: #4a90e2;
259
+ box-shadow: 0 0 0 3px rgba(74, 144, 226, 0.1);
 
260
  }
261
 
262
  /* Slider styling */
263
  .gradio-slider {
264
+ margin: 8px 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  }
266
 
267
  /* Examples styling */
268
  .gradio-examples {
269
+ margin-top: 16px;
 
 
 
 
 
270
  }
271
 
272
  .gradio-examples .gradio-button {
273
+ background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
274
+ border: 1px solid #dee2e6;
275
+ color: #495057;
276
  font-size: 13px;
277
+ padding: 8px 12px;
 
 
 
 
278
  }
279
 
280
  .gradio-examples .gradio-button:hover {
281
+ background: linear-gradient(135deg, #e9ecef 0%, #dee2e6 100%);
282
+ color: #212529;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  }
284
  """
285
 
286
+ # Create Gradio interface
287
  with gr.Blocks(
288
+ title="πŸ€– Dhanishtha-2.0-preview Chat",
289
+ theme=gr.themes.Soft(),
290
+ css=custom_css
 
 
 
 
 
 
 
291
  ) as demo:
292
+ gr.Markdown(
293
+ """
294
+ # πŸ€– Dhanishtha-2.0-preview Chat
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
+ Chat with the **HelpingAI/Dhanishtha-2.0-preview** model - The world's first LLM designed to think between responses!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
+ ### ✨ Key Features:
299
+ - 🧠 **Multi-step Reasoning**: Unlike other LLMs that think once, Dhanishtha can think, rethink, self-evaluate, and refine using multiple `<think>` blocks
300
+ - πŸ”„ **Iterative Thinking**: Watch the model's thought process unfold in real-time
301
+ - πŸ’‘ **Enhanced Problem Solving**: Better reasoning capabilities through structured thinking
 
 
 
 
302
 
303
+ **Note**: The `<think>` blocks show the model's internal reasoning process and will be displayed in a formatted way below.
304
+ """
305
+ )
306
+
307
+ with gr.Row():
308
+ with gr.Column(scale=4):
309
+ chatbot = gr.Chatbot(
310
+ [],
311
+ elem_id="chatbot",
312
+ bubble_full_width=False,
313
+ height=600,
314
+ show_copy_button=True,
315
+ show_share_button=True,
316
+ avatar_images=("πŸ‘€", "πŸ€–"),
317
+ render_markdown=True,
318
+ sanitize_html=False, # Allow HTML for thinking blocks
319
+ latex_delimiters=[
320
+ {"left": "$$", "right": "$$", "display": True},
321
+ {"left": "$", "right": "$", "display": False}
322
+ ]
323
+ )
324
+
325
+ with gr.Row():
326
+ msg = gr.Textbox(
327
+ container=False,
328
+ placeholder="Ask me anything! I'll show you my thinking process...",
329
+ label="Message",
330
+ autofocus=True,
331
+ scale=8,
332
+ lines=1,
333
+ max_lines=5
334
  )
335
+ send_btn = gr.Button("πŸš€ Send", variant="primary", scale=1, size="lg")
336
+
337
+ with gr.Column(scale=1, min_width=300):
338
+ gr.Markdown("### βš™οΈ Generation Parameters")
339
+
340
+ max_tokens = gr.Slider(
341
+ minimum=50,
342
+ maximum=8192,
343
+ value=2048,
344
+ step=50,
345
+ label="🎯 Max Tokens",
346
+ info="Maximum number of tokens to generate"
347
+ )
348
+
349
+ temperature = gr.Slider(
350
+ minimum=0.1,
351
+ maximum=2.0,
352
+ value=0.7,
353
+ step=0.1,
354
+ label="🌑️ Temperature",
355
+ info="Higher = more creative, Lower = more focused"
356
+ )
357
+
358
+ top_p = gr.Slider(
359
+ minimum=0.1,
360
+ maximum=1.0,
361
+ value=0.9,
362
+ step=0.05,
363
+ label="🎲 Top-p",
364
+ info="Nucleus sampling threshold"
365
+ )
366
+
367
+ with gr.Row():
368
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="secondary", scale=1)
369
+ stop_btn = gr.Button("⏹️ Stop", variant="stop", scale=1)
370
+
371
+ gr.Markdown("### πŸ“Š Model Info")
372
+ gr.Markdown(
373
+ """
374
+ **Model**: HelpingAI/Dhanishtha-2.0-preview
375
+ **Type**: Reasoning LLM with thinking blocks
376
+ **Features**: Multi-step reasoning, self-evaluation
377
+ """
378
+ )
379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  # Event handlers
381
  def clear_chat():
382
  """Clear the chat history"""
 
406
  show_progress=False
407
  )
408
 
409
+ # Example prompts section
410
+ with gr.Row():
411
+ gr.Examples(
412
+ examples=[
413
+ ["Hello! Can you introduce yourself and show me how you think?"],
414
+ ["Solve this step by step: What is 15% of 240?"],
415
+ ["Explain quantum entanglement in simple terms"],
416
+ ["Write a short Python function to find the factorial of a number"],
417
+ ["What are the pros and cons of renewable energy?"],
418
+ ["Help me understand the difference between AI and machine learning"],
419
+ ["Create a haiku about artificial intelligence"],
420
+ ["Explain why the sky is blue using physics principles"]
421
+ ],
422
+ inputs=msg,
423
+ label="πŸ’‘ Example Prompts - Try these to see the thinking process!",
424
+ examples_per_page=4
425
+ )
426
+
427
+ # Footer with information
428
+ gr.Markdown(
429
+ """
430
+ ---
431
+ ### πŸ”§ Technical Details
432
+ - **Model**: HelpingAI/Dhanishtha-2.0-preview
433
+ - **Framework**: Transformers + Gradio
434
+ - **Features**: Real-time streaming, thinking process visualization, custom sampling
435
+ - **Reasoning**: Multi-step thinking with `<think>` blocks for transparent AI reasoning
436
+
437
+ **Note**: This interface streams responses token by token and formats thinking blocks for better readability.
438
+ The model's internal reasoning process is displayed in formatted code blocks.
439
+
440
+ ---
441
+ *Built with ❀️ using Gradio and Transformers*
442
+ """
443
+ )
444
 
445
  if __name__ == "__main__":
446
  demo.queue(
447
+ max_size=20,
448
+ default_concurrency_limit=1
449
  ).launch(
450
  server_name="0.0.0.0",
451
  server_port=7860,
452
  share=False,
453
  show_error=True,
454
+ quiet=False
 
 
 
455
  )