phani50101 commited on
Commit
2ba6c15
·
verified ·
1 Parent(s): 7482fd2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +744 -0
app.py ADDED
@@ -0,0 +1,744 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import openvino_genai
5
+ from huggingface_hub import snapshot_download
6
+ from threading import Lock, Event
7
+ import os
8
+ import numpy as np
9
+ import requests
10
+ from PIL import Image
11
+ from io import BytesIO
12
+ import cpuinfo
13
+ import openvino as ov
14
+ import librosa
15
+ from googleapiclient.discovery import build
16
+ import gc
17
+ from PyPDF2 import PdfReader
18
+ from docx import Document
19
+ import textwrap
20
+ from queue import Queue, Empty
21
+ from concurrent.futures import ThreadPoolExecutor
22
+ from typing import Generator
23
+
24
+
25
+ GOOGLE_API_KEY = "AIzaSyAo-1iW5MEZbc53DlEldtnUnDaYuTHUDH4"
26
+ GOOGLE_CSE_ID = "3027bedf3c88a4efb"
27
+ DEFAULT_MAX_TOKENS = 4096
28
+ DEFAULT_NUM_IMAGES = 1
29
+ MAX_HISTORY_TURNS = 3
30
+ MAX_TOKENS_LIMIT = 4096
31
+
32
+ class UnifiedAISystem:
33
+ def __init__(self):
34
+ self.pipe_lock = Lock()
35
+ self.current_df = None
36
+ self.mistral_pipe = None
37
+ self.internvl_pipe = None
38
+ self.whisper_pipe = None
39
+ self.current_document_text = None
40
+ self.generation_executor = ThreadPoolExecutor(max_workers=3)
41
+ self.initialize_models()
42
+
43
+ def initialize_models(self):
44
+ """Initialize all required models"""
45
+
46
+ if not os.path.exists("mistral-ov"):
47
+ snapshot_download(repo_id="OpenVINO/mistral-7b-instruct-v0.1-int8-ov", local_dir="mistral-ov")
48
+ if not os.path.exists("internvl-ov"):
49
+ snapshot_download(repo_id="OpenVINO/InternVL2-1B-int8-ov", local_dir="internvl-ov")
50
+ if not os.path.exists("whisper-ov-model"):
51
+ snapshot_download(repo_id="OpenVINO/whisper-tiny-fp16-ov", local_dir="whisper-ov-model")
52
+
53
+ cpu_features = cpuinfo.get_cpu_info()['flags']
54
+ config_options = {}
55
+ if 'avx512' in cpu_features:
56
+ config_options["ENFORCE_BF16"] = "YES"
57
+ elif 'avx2' in cpu_features:
58
+ config_options["INFERENCE_PRECISION_HINT"] = "f32"
59
+
60
+ # Initialize Mistral model
61
+ self.mistral_pipe = openvino_genai.LLMPipeline(
62
+ "mistral-ov",
63
+ device="CPU",
64
+ config={"PERFORMANCE_HINT": "THROUGHPUT", **config_options}
65
+ )
66
+
67
+
68
+ self.whisper_pipe = openvino_genai.WhisperPipeline("whisper-ov-model", device="CPU")
69
+
70
+ def load_data(self, file_path):
71
+ """Load student data from file"""
72
+ try:
73
+ file_ext = os.path.splitext(file_path)[1].lower()
74
+ if file_ext == '.csv':
75
+ self.current_df = pd.read_csv(file_path)
76
+ elif file_ext in ['.xlsx', '.xls']:
77
+ self.current_df = pd.read_excel(file_path)
78
+ else:
79
+ return False, "❌ Unsupported file format. Please upload a .csv or .xlsx file."
80
+ return True, f"✅ Loaded {len(self.current_df)} records from {os.path.basename(file_path)}"
81
+ except Exception as e:
82
+ return False, f"❌ Error loading file: {str(e)}"
83
+
84
+ def extract_text_from_document(self, file_path):
85
+ """Extract text from PDF or DOCX documents"""
86
+ text = ""
87
+ try:
88
+ file_ext = os.path.splitext(file_path)[1].lower()
89
+
90
+ if file_ext == '.pdf':
91
+ with open(file_path, 'rb') as file:
92
+ pdf_reader = PdfReader(file)
93
+ for page in pdf_reader.pages:
94
+ text += page.extract_text() + "\n"
95
+
96
+ elif file_ext == '.docx':
97
+ doc = Document(file_path)
98
+ for para in doc.paragraphs:
99
+ text += para.text + "\n"
100
+
101
+ else:
102
+ return False, "❌ Unsupported document format. Please upload PDF or DOCX."
103
+
104
+ # Clean and format text
105
+ text = text.replace('\x0c', '')
106
+ text = textwrap.dedent(text)
107
+ self.current_document_text = text
108
+ return True, f"✅ Extracted text from {os.path.basename(file_path)}"
109
+
110
+ except Exception as e:
111
+ return False, f"❌ Error processing document: {str(e)}"
112
+
113
+ def generate_text_stream(self, prompt: str, max_tokens: int) -> Generator[str, None, None]:
114
+ """Unified text generation with queued token streaming"""
115
+ start_time = time.time()
116
+ response_queue = Queue()
117
+ completion_event = Event()
118
+ error = [None]
119
+
120
+ optimized_config = openvino_genai.GenerationConfig(
121
+ max_new_tokens=max_tokens,
122
+ temperature=0.3,
123
+ top_p=0.9,
124
+ streaming=True,
125
+ streaming_interval=5
126
+ )
127
+
128
+ def callback(tokens):
129
+ response_queue.put("".join(tokens))
130
+ return openvino_genai.StreamingStatus.RUNNING
131
+
132
+ def generate():
133
+ try:
134
+ with self.pipe_lock:
135
+ self.mistral_pipe.generate(prompt, optimized_config, callback)
136
+ except Exception as e:
137
+ error[0] = str(e)
138
+ finally:
139
+ completion_event.set()
140
+
141
+
142
+ self.generation_executor.submit(generate)
143
+
144
+ accumulated = []
145
+ token_count = 0
146
+ last_gc = time.time()
147
+
148
+ while not completion_event.is_set() or not response_queue.empty():
149
+ if error[0]:
150
+ yield f"❌ Error: {error[0]}"
151
+ print(f"Stream generation time: {time.time() - start_time:.2f} seconds")
152
+ return
153
+
154
+ try:
155
+ token_batch = response_queue.get(timeout=0.1)
156
+ accumulated.append(token_batch)
157
+ token_count += len(token_batch)
158
+ yield "".join(accumulated)
159
+
160
+
161
+ if time.time() - last_gc > 2.0:
162
+ gc.collect()
163
+ last_gc = time.time()
164
+ except Empty:
165
+ continue
166
+
167
+ print(f"Generated {token_count} tokens in {time.time() - start_time:.2f} seconds "
168
+ f"({token_count/(time.time() - start_time):.2f} tokens/sec)")
169
+ yield "".join(accumulated)
170
+
171
+ def analyze_student_data(self, query, max_tokens=4098):
172
+ """Analyze student data using AI with streaming"""
173
+ if not query or not query.strip():
174
+ yield "⚠️ Please enter a valid question"
175
+ return
176
+
177
+ if self.current_df is None:
178
+ yield "⚠️ Please upload and load a student data file first"
179
+ return
180
+
181
+ data_summary = self._prepare_data_summary(self.current_df)
182
+ prompt = f"""You are an expert education analyst. Analyze the following student performance data:
183
+ {data_summary}
184
+ Question: {query}
185
+ Please include:
186
+ 1. Direct answer to the question
187
+ 2. Relevant statistics
188
+ 3. Key insights
189
+ 4. Actionable recommendations
190
+ Format the output with clear headings"""
191
+
192
+
193
+ yield from self.generate_text_stream(prompt, max_tokens)
194
+
195
+ def _prepare_data_summary(self, df):
196
+ """Summarize the uploaded data"""
197
+ summary = f"Student performance data with {len(df)} rows and {len(df.columns)} columns.\n"
198
+ summary += "Columns: " + ", ".join(df.columns) + "\n"
199
+ summary += "First 3 rows:\n" + df.head(3).to_string(index=False)
200
+ return summary
201
+
202
+ def analyze_image(self, image, url, prompt):
203
+ """Analyze image with InternVL model (synchronous, no streaming)"""
204
+ try:
205
+ if image is not None:
206
+ image_source = image
207
+ elif url and url.startswith(("http://", "https://")):
208
+ response = requests.get(url)
209
+ image_source = Image.open(BytesIO(response.content)).convert("RGB")
210
+ else:
211
+ return "⚠️ Please upload an image or enter a valid URL"
212
+
213
+
214
+ image_data = np.array(image_source.getdata()).reshape(
215
+ 1, image_source.size[1], image_source.size[0], 3
216
+ ).astype(np.byte)
217
+ image_tensor = ov.Tensor(image_data)
218
+
219
+
220
+ if self.internvl_pipe is None:
221
+ self.internvl_pipe = openvino_genai.VLMPipeline("internvl-ov", device="CPU")
222
+
223
+ with self.pipe_lock:
224
+ self.internvl_pipe.start_chat()
225
+ output = self.internvl_pipe.generate(prompt, image=image_tensor, max_new_tokens=100)
226
+ self.internvl_pipe.finish_chat()
227
+
228
+
229
+ return output
230
+
231
+ except Exception as e:
232
+ return f"❌ Error: {str(e)}"
233
+
234
+ def process_audio(self, data, sr):
235
+ """Process audio data for speech recognition"""
236
+ try:
237
+
238
+ if data.ndim > 1:
239
+ data = np.mean(data, axis=1)
240
+ else:
241
+ data = data
242
+
243
+
244
+ data = data.astype(np.float32)
245
+ max_val = np.max(np.abs(data)) + 1e-7
246
+ data /= max_val
247
+
248
+ # Simple noise reduction
249
+ data = np.clip(data, -0.5, 0.5)
250
+
251
+ # Trim silence
252
+ energy = np.abs(data)
253
+ threshold = np.percentile(energy, 25)
254
+ mask = energy > threshold
255
+ indices = np.where(mask)[0]
256
+
257
+ if len(indices) > 0:
258
+ start = max(0, indices[0] - 1000)
259
+ end = min(len(data), indices[-1] + 1000)
260
+ data = data[start:end]
261
+
262
+
263
+ if sr != 16000:
264
+
265
+ new_length = int(len(data) * 16000 / sr)
266
+
267
+ data = np.interp(
268
+ np.linspace(0, len(data)-1, new_length),
269
+ np.arange(len(data)),
270
+ data
271
+ )
272
+ sr = 16000
273
+
274
+ return data
275
+ except Exception as e:
276
+ print(f"Audio processing error: {e}")
277
+ return np.array([], dtype=np.float32)
278
+
279
+ def transcribe(self, audio):
280
+ """Transcribe audio using Whisper model with improved error handling"""
281
+ if audio is None:
282
+ return ""
283
+ sr, data = audio
284
+
285
+
286
+ if len(data)/sr < 0.5:
287
+ return ""
288
+
289
+ try:
290
+ processed = self.process_audio(data, sr)
291
+
292
+
293
+ if len(processed) < 8000:
294
+ return ""
295
+
296
+
297
+ result = self.whisper_pipe.generate(processed)
298
+ return result
299
+ except Exception as e:
300
+ print(f"Transcription error: {e}")
301
+ return "❌ Transcription failed - please try again"
302
+
303
+ def generate_lesson_plan(self, topic, duration, additional_instructions="", max_tokens=4096):
304
+ """Generate a lesson plan based on document content"""
305
+ if not topic:
306
+ yield "⚠️ Please enter a lesson topic"
307
+ return
308
+
309
+ if not self.current_document_text:
310
+ yield "⚠️ Please upload and process a document first"
311
+ return
312
+
313
+ prompt = f"""As an expert educator, create a focused lesson plan using the provided content.
314
+ **Core Requirements:**
315
+ 1. TOPIC: {topic}
316
+ 2. TOTAL DURATION: {duration} periods
317
+ 3. ADDITIONAL INSTRUCTIONS: {additional_instructions or 'None'}
318
+ **Content Summary:**
319
+ {self.current_document_text[:2500]}... [truncated]
320
+ **Output Structure:**
321
+ 1. PERIOD ALLOCATION (Break topic into {duration} logical segments):
322
+ - Period 1: [Subtopic 1]
323
+ - Period 2: [Subtopic 2]
324
+ ...
325
+ 2. LEARNING OBJECTIVES (Max 3 bullet points)
326
+ 3. TEACHING ACTIVITIES (One engaging method per period)
327
+ 4. RESOURCES (Key materials from document)
328
+ 5. ASSESSMENT (Simple checks for understanding)
329
+ 6. PAGE REFERENCES (Specific source pages)
330
+ **Key Rules:**
331
+ - Strictly divide content into exactly {duration} periods
332
+ - Prioritize document content over creativity
333
+ - Keep objectives measurable
334
+ - Use only document resources
335
+ - Make page references specific"""
336
+
337
+
338
+ yield from self.generate_text_stream(prompt, max_tokens)
339
+
340
+ def fetch_images(self, query: str, num: int = DEFAULT_NUM_IMAGES) -> list:
341
+ """Fetch unique images by requesting different result pages"""
342
+ if num <= 0:
343
+ return []
344
+
345
+ try:
346
+ service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
347
+ image_links = []
348
+ seen_urls = set()
349
+
350
+
351
+ for start_index in range(1, num * 2, 2):
352
+ if len(image_links) >= num:
353
+ break
354
+
355
+ res = service.cse().list(
356
+ q=query,
357
+ cx=GOOGLE_CSE_ID,
358
+ searchType="image",
359
+ num=1,
360
+ start=start_index
361
+ ).execute()
362
+
363
+ if "items" in res and res["items"]:
364
+ item = res["items"][0]
365
+ # Skip duplicates
366
+ if item["link"] not in seen_urls:
367
+ image_links.append(item["link"])
368
+ seen_urls.add(item["link"])
369
+
370
+ return image_links[:num]
371
+ except Exception as e:
372
+ print(f"Error in image fetching: {e}")
373
+ return []
374
+
375
+
376
+ ai_system = UnifiedAISystem()
377
+
378
+
379
+ css = """
380
+ :root {
381
+ --bg: #0D0D0D;
382
+ --surface: #1F1F1F;
383
+ --primary: #BB86FC;
384
+ --secondary: #03DAC6;
385
+ --accent: #CF6679;
386
+ --success: #4CAF50;
387
+ --warning: #FFB300;
388
+ --text: #FFFFFF;
389
+ --subtext: #B0B0B0;
390
+ --divider: #333333;
391
+ }
392
+ body, .gradio-container { background: var(--bg); color: var(--text); }
393
+ .user-msg,
394
+ .bot-msg,
395
+ .upload-box,
396
+ #question-input,
397
+ .mode-checkbox,
398
+ .system-info,
399
+ .lesson-plan { background: var(--surface); border-radius: 8px; color: var(--text); }
400
+ .user-msg,
401
+ .bot-msg { padding: 12px 16px; margin: 8px 0; line-height:1.5; border-left:4px solid var(--primary); box-shadow:0 2px 6px rgba(0,0,0,0.5); }
402
+ .bot-msg { border-color: var(--secondary); }
403
+ .upload-box { padding:16px; margin-bottom:16px; border:1px solid var(--divider); }
404
+ #question-input,
405
+ .mode-checkbox { padding:12px; border:1px solid var(--divider); }
406
+ .slider-container { margin:20px 0; padding:15px; border-radius:10px; background:var(--secondary); }
407
+ .system-info { padding:15px; margin:15px 0; border-left:4px solid var(--primary); }
408
+ .chat-image { max-height:100px; margin:4px; border-radius:8px; box-shadow:0 2px 6px rgba(0,0,0,0.5); cursor:pointer; transition:transform .2s; }
409
+ .chat-image:hover { transform:scale(1.05); box-shadow:0 4px 10px rgba(0,0,0,0.7); }
410
+ .modal { position:fixed; inset:0; background:rgba(0,0,0,0.9); display:none; cursor:zoom-out; }
411
+ .modal-content { position:absolute; top:50%; left:50%; transform:translate(-50%,-50%); max-width:90%; max-height:90%; padding:10px; border-radius:12px; background:var(--surface); }
412
+ .modal-img { max-width:100%; max-height:100%; border-radius:8px; }
413
+ .typing-indicator { display:inline-block; position:relative; width:40px; height:20px; }
414
+ .typing-dot { width:6px; height:6px; border-radius:50%; background:var(--text); position:absolute; animation:typing 1.4s infinite ease-in-out; }
415
+ .typing-dot:nth-child(1){left:0;}
416
+ .typing-dot:nth-child(2){left:12px;animation-delay:.2s}
417
+ .typing-dot:nth-child(3){left:24px;animation-delay:.4s}
418
+ @keyframes typing{0%,60%,100%{transform:translateY(0)}30%{transform:translateY(-5px)}}
419
+ .lesson-title { font-size:1.2em; font-weight:bold; color:var(--primary); margin-bottom:8px; }
420
+ .page-ref { display:inline-block; padding:3px 8px; margin:3px; border-radius:4px; background:var(--primary); color:var(--text); font-size:.9em; }
421
+ /* Scrollbar */
422
+ .chatbot::-webkit-scrollbar{width:8px}
423
+ .chatbot::-webkit-scrollbar-track{background:var(--surface);border-radius:4px}
424
+ .chatbot::-webkit-scrollbar-thumb{background:var(--primary);border-radius:4px}
425
+ .chatbot::-webkit-scrollbar-thumb:hover{background:var(--secondary)}
426
+ """
427
+
428
+
429
+ with gr.Blocks(css=css, title="Unified EDU Assistant") as demo:
430
+ gr.Markdown("# 🤖 Unified EDU Assistant by Phanindra Reddy K")
431
+
432
+
433
+ gr.HTML("""
434
+ <div class="system-info">
435
+ <strong>Multi-Modal AI Assistant</strong>
436
+ <ul>
437
+ <li>Text & Voice Chat with Mistral-7B</li>
438
+ <li>Image Understanding with InternVL</li>
439
+ <li>Student Data Analysis</li>
440
+ <li>Visual Search with Google Images</li>
441
+ <li>Lesson Planning from Documents</li>
442
+ </ul>
443
+ </div>
444
+ """)
445
+
446
+
447
+ modal_html = """
448
+ <div class="modal" id="imageModal" onclick="this.style.display='none'">
449
+ <div class="modal-content">
450
+ <img class="modal-img" id="expandedImg">
451
+ </div>
452
+ </div>
453
+ <script>
454
+ function showImage(url) {
455
+ document.getElementById('expandedImg').src = url;
456
+ document.getElementById('imageModal').style.display = 'block';
457
+ }
458
+ </script>
459
+ """
460
+ gr.HTML(modal_html)
461
+
462
+ chat_state = gr.State([])
463
+ with gr.Column(scale=2, elem_classes="chat-container"):
464
+ chatbot = gr.Chatbot(label="Conversation", height=500, bubble_full_width=False,
465
+ avatar_images=("user.png", "bot.png"), show_label=False)
466
+
467
+
468
+ with gr.Row():
469
+ chat_mode = gr.Checkbox(label="💬 General Chat", value=True, elem_classes="mode-checkbox")
470
+ student_mode = gr.Checkbox(label="🎓 Student Analytics", value=False, elem_classes="mode-checkbox")
471
+ image_mode = gr.Checkbox(label="🖼️ Image Analysis", value=False, elem_classes="mode-checkbox")
472
+ lesson_mode = gr.Checkbox(label="📝 Lesson Planning", value=False, elem_classes="mode-checkbox")
473
+
474
+
475
+ with gr.Column() as chat_inputs:
476
+ include_images = gr.Checkbox(label="Include Visuals", value=True)
477
+ user_input = gr.Textbox(
478
+ placeholder="Type your question here...",
479
+ label="Your Question",
480
+ container=False,
481
+ elem_id="question-input"
482
+ )
483
+ with gr.Row():
484
+ max_tokens = gr.Slider(
485
+ minimum=10,
486
+ maximum=7910,
487
+ value=2048,
488
+ step=100,
489
+ label="Response Length (Tokens)"
490
+ )
491
+ num_images = gr.Slider(
492
+ minimum=0,
493
+ maximum=5,
494
+ value=1,
495
+ step=1,
496
+ label="Number of Images",
497
+ visible=True
498
+ )
499
+
500
+
501
+ with gr.Column(visible=False) as student_inputs:
502
+ file_upload = gr.File(label="CSV/Excel File", file_types=[".csv", ".xlsx"], type="filepath")
503
+ student_question = gr.Textbox(
504
+ placeholder="Ask questions about student data...",
505
+ label="Your Question",
506
+ elem_id="question-input"
507
+ )
508
+ student_status = gr.Markdown("No file loaded")
509
+
510
+
511
+ with gr.Column(visible=False) as image_inputs:
512
+ image_upload = gr.Image(type="pil", label="Upload Image")
513
+ image_url = gr.Textbox(
514
+ label="OR Enter Image URL",
515
+ placeholder="https://example.com/image.jpg",
516
+ elem_id="question-input"
517
+ )
518
+ image_question = gr.Textbox(
519
+ placeholder="Ask questions about the image...",
520
+ label="Your Question",
521
+ elem_id="question-input"
522
+ )
523
+
524
+
525
+ with gr.Column(visible=False) as lesson_inputs:
526
+ gr.Markdown("### 📚 Lesson Planning")
527
+ doc_upload = gr.File(
528
+ label="Upload Curriculum Document (PDF/DOCX)",
529
+ file_types=[".pdf", ".docx"],
530
+ type="filepath"
531
+ )
532
+ doc_status = gr.Markdown("No document uploaded")
533
+
534
+ with gr.Row():
535
+ topic_input = gr.Textbox(
536
+ label="Lesson Topic",
537
+ placeholder="Enter the main topic for the lesson plan"
538
+ )
539
+ duration_input = gr.Number(
540
+ label="Total Periods",
541
+ value=5,
542
+ minimum=1,
543
+ maximum=20,
544
+ step=1
545
+ )
546
+
547
+ additional_instructions = gr.Textbox(
548
+ label="Additional Requirements (optional)",
549
+ placeholder="Specific teaching methods, resources, or special considerations..."
550
+ )
551
+
552
+ generate_btn = gr.Button("Generate Lesson Plan", variant="primary")
553
+
554
+
555
+ with gr.Row():
556
+ submit_btn = gr.Button("Send", variant="primary")
557
+ mic_btn = gr.Button("Transcribe Voice", variant="secondary")
558
+ mic = gr.Audio(sources=["microphone"], type="numpy", label="Voice Input")
559
+
560
+
561
+ def toggle_modes(chat, student, image, lesson):
562
+ return [
563
+ gr.update(visible=chat),
564
+ gr.update(visible=student),
565
+ gr.update(visible=image),
566
+ gr.update(visible=lesson)
567
+ ]
568
+
569
+ def load_student_file(file_path):
570
+ success, message = ai_system.load_data(file_path)
571
+ return message
572
+
573
+ def process_document(file_path):
574
+ if not file_path:
575
+ return "⚠️ Please select a document first"
576
+ success, message = ai_system.extract_text_from_document(file_path)
577
+ return message
578
+
579
+ def render_history(history):
580
+ """Render chat history with images and proper formatting"""
581
+ rendered = []
582
+ for user_msg, bot_msg, image_links in history:
583
+ user_html = f"<div class='user-msg'>{user_msg}</div>"
584
+
585
+
586
+ bot_text = str(bot_msg)
587
+
588
+ if "Lesson Plan:" in bot_text:
589
+ bot_html = f"<div class='lesson-plan'>{bot_text}</div>"
590
+ else:
591
+ bot_html = f"<div class='bot-msg'>{bot_text}</div>"
592
+
593
+ # Add images if available
594
+ if image_links:
595
+ images_html = "".join(
596
+ f"<img src='{url}' class='chat-image' onclick='showImage(\"{url}\")' />"
597
+ for url in image_links
598
+ )
599
+ bot_html += f"<br><br><b>📸 Related Visuals:</b><br><div style='display: flex; flex-wrap: wrap;'>{images_html}</div>"
600
+
601
+ rendered.append((user_html, bot_html))
602
+ return rendered
603
+
604
+ def respond(message, history, chat, student, image, lesson,
605
+ tokens, student_q, image_q, image_upload, image_url,
606
+ include_visuals, num_imgs, topic, duration, additional):
607
+ """
608
+ 1. Use actual_message (depending on mode) instead of raw `message`.
609
+ 2. Convert any non‐string Bot response (like VLMDecodedResults) to str().
610
+ 3. Disable the input box during streaming, then re-enable it at the end.
611
+ """
612
+ updated_history = list(history)
613
+
614
+
615
+ if student:
616
+ actual_message = student_q
617
+ elif image:
618
+ actual_message = image_q
619
+ elif lesson:
620
+ actual_message = f"Generate lesson plan for: {topic} ({duration} periods)"
621
+ if additional:
622
+ actual_message += f"\nAdditional: {additional}"
623
+ else:
624
+ actual_message = message
625
+
626
+
627
+ typing_html = "<div class='typing-indicator'><div class='typing-dot'></div><div class='typing-dot'></div><div class='typing-dot'></div></div>"
628
+ updated_history.append((actual_message, typing_html, []))
629
+
630
+ yield render_history(updated_history), gr.update(value="", interactive=False), updated_history
631
+
632
+ full_response = ""
633
+ images = []
634
+
635
+ try:
636
+ if chat:
637
+
638
+ for chunk in ai_system.generate_text_stream(actual_message, tokens):
639
+ full_response = chunk
640
+ updated_history[-1] = (actual_message, full_response, [])
641
+ yield render_history(updated_history), gr.update(value="", interactive=False), updated_history
642
+
643
+ if include_visuals:
644
+ images = ai_system.fetch_images(actual_message, num_imgs)
645
+
646
+ elif student:
647
+
648
+ if ai_system.current_df is None:
649
+ full_response = "⚠️ Please upload a student data file first"
650
+ else:
651
+ for chunk in ai_system.analyze_student_data(student_q, tokens):
652
+ full_response = chunk
653
+ updated_history[-1] = (actual_message, full_response, [])
654
+ yield render_history(updated_history), gr.update(value="", interactive=False), updated_history
655
+
656
+ elif image:
657
+
658
+ if (not image_upload) and (not image_url):
659
+ full_response = "⚠️ Please upload an image or enter a URL"
660
+ else:
661
+
662
+ result_obj = ai_system.analyze_image(image_upload, image_url, image_q)
663
+ full_response = str(result_obj)
664
+
665
+ elif lesson:
666
+
667
+ if not topic:
668
+ full_response = "⚠️ Please enter a lesson topic"
669
+ else:
670
+ duration = int(duration) if duration else 5
671
+ for chunk in ai_system.generate_lesson_plan(topic, duration, additional, tokens):
672
+ full_response = chunk
673
+ updated_history[-1] = (actual_message, full_response, [])
674
+ yield render_history(updated_history), gr.update(value="", interactive=False), updated_history
675
+
676
+
677
+ updated_history[-1] = (actual_message, full_response, images)
678
+ if len(updated_history) > MAX_HISTORY_TURNS:
679
+ updated_history = updated_history[-MAX_HISTORY_TURNS:]
680
+
681
+ except Exception as e:
682
+ error_msg = f"❌ Error: {str(e)}"
683
+ updated_history[-1] = (actual_message, error_msg, [])
684
+
685
+
686
+ yield render_history(updated_history), gr.update(value="", interactive=True), updated_history
687
+
688
+ # Voice transcription
689
+ def transcribe_audio(audio):
690
+ return ai_system.transcribe(audio)
691
+
692
+
693
+ chat_mode.change(fn=toggle_modes, inputs=[chat_mode, student_mode, image_mode, lesson_mode],
694
+ outputs=[chat_inputs, student_inputs, image_inputs, lesson_inputs])
695
+ student_mode.change(fn=toggle_modes, inputs=[chat_mode, student_mode, image_mode, lesson_mode],
696
+ outputs=[chat_inputs, student_inputs, image_inputs, lesson_inputs])
697
+ image_mode.change(fn=toggle_modes, inputs=[chat_mode, student_mode, image_mode, lesson_mode],
698
+ outputs=[chat_inputs, student_inputs, image_inputs, lesson_inputs])
699
+ lesson_mode.change(fn=toggle_modes, inputs=[chat_mode, student_mode, image_mode, lesson_mode],
700
+ outputs=[chat_inputs, student_inputs, image_inputs, lesson_inputs])
701
+
702
+ # File upload handler
703
+ file_upload.change(fn=load_student_file, inputs=file_upload, outputs=student_status)
704
+
705
+ # Document upload handler
706
+ doc_upload.change(fn=process_document, inputs=doc_upload, outputs=doc_status)
707
+
708
+ mic_btn.click(fn=transcribe_audio, inputs=mic, outputs=user_input)
709
+
710
+ # Submit handler
711
+ submit_btn.click(
712
+ fn=respond,
713
+ inputs=[
714
+ user_input, chat_state, chat_mode, student_mode, image_mode, lesson_mode,
715
+ max_tokens, student_question, image_question, image_upload, image_url,
716
+ include_images, num_images,
717
+ topic_input, duration_input, additional_instructions
718
+ ],
719
+ outputs=[chatbot, user_input, chat_state]
720
+ )
721
+
722
+
723
+ generate_btn.click(
724
+ fn=respond,
725
+ inputs=[
726
+ gr.Textbox(value="Generate lesson plan", visible=False),
727
+ chat_state,
728
+ chat_mode, student_mode, image_mode, lesson_mode,
729
+ max_tokens,
730
+ gr.Textbox(visible=False),
731
+ gr.Textbox(visible=False),
732
+ gr.Image(visible=False),
733
+ gr.Textbox(visible=False),
734
+ gr.Checkbox(visible=False),
735
+ gr.Slider(visible=False),
736
+ topic_input,
737
+ duration_input,
738
+ additional_instructions
739
+ ],
740
+ outputs=[chatbot, user_input, chat_state]
741
+ )
742
+
743
+ if __name__ == "__main__":
744
+ demo.launch(share=True, debug=True)