phani50101 commited on
Commit
cff6fb6
·
verified ·
1 Parent(s): b02b868

new version

Browse files
Files changed (2) hide show
  1. app.py +879 -126
  2. requirements.txt +15 -6
app.py CHANGED
@@ -1,126 +1,879 @@
1
- from huggingface_hub import snapshot_download
2
-
3
- # Download models from Hugging Face to local folders
4
- snapshot_download(
5
- repo_id="OpenVINO/Mistral-7B-Instruct-v0.2-int4-ov",
6
- local_dir="mistral-ov"
7
- )
8
- snapshot_download(
9
- repo_id="OpenVINO/whisper-tiny-fp16-ov",
10
- local_dir="whisper-ov-model"
11
- )
12
-
13
- import gradio as gr
14
- import openvino_genai
15
- import librosa
16
- import numpy as np
17
- from threading import Thread, Lock, Event
18
- from scipy.ndimage import uniform_filter1d
19
- from queue import Queue, Empty
20
-
21
- # Initialize Mistral pipeline
22
- mistral_pipe = openvino_genai.LLMPipeline("mistral-ov", device="CPU")
23
- config = openvino_genai.GenerationConfig(
24
- max_new_tokens=100,
25
- num_beams=1,
26
- do_sample=False,
27
- temperature=0.0,
28
- top_p=1.0,
29
- top_k=50
30
- )
31
- pipe_lock = Lock()
32
-
33
- # Initialize Whisper pipeline
34
- whisper_pipe = openvino_genai.WhisperPipeline("whisper-ov-model", device="CPU")
35
-
36
- def process_audio(data, sr):
37
- """Audio processing with silence trimming"""
38
- data = librosa.to_mono(data.T) if data.ndim > 1 else data
39
- data = data.astype(np.float32)
40
- data /= np.max(np.abs(data))
41
-
42
- # Voice activity detection
43
- frame_length, hop_length = 2048, 512
44
- rms = librosa.feature.rms(y=data, frame_length=frame_length, hop_length=hop_length)[0]
45
- smoothed_rms = uniform_filter1d(rms, size=5)
46
- speech_frames = np.where(smoothed_rms > 0.025)[0]
47
-
48
- if not speech_frames.size:
49
- return None
50
-
51
- start = max(0, int(speech_frames[0] * hop_length - 0.1*sr))
52
- end = min(len(data), int((speech_frames[-1]+1) * hop_length + 0.1*sr))
53
- return data[start:end]
54
-
55
- def transcribe(audio):
56
- """Audio to text transcription"""
57
- sr, data = audio
58
- processed = process_audio(data, sr)
59
- if processed is None or len(processed) < 1600:
60
- return ""
61
-
62
- if sr != 16000:
63
- processed = librosa.resample(processed, orig_sr=sr, target_sr=16000)
64
-
65
- return whisper_pipe.generate(processed)
66
-
67
- def stream_generator(message, history):
68
- response_queue = Queue()
69
- completion_event = Event()
70
- error_message = [None]
71
-
72
- def callback(token):
73
- response_queue.put(token)
74
- return openvino_genai.StreamingStatus.RUNNING
75
-
76
- def generate():
77
- try:
78
- with pipe_lock:
79
- mistral_pipe.generate(message, config, callback)
80
- except Exception as e:
81
- error_message[0] = str(e)
82
- finally:
83
- completion_event.set()
84
-
85
- Thread(target=generate, daemon=True).start()
86
-
87
- accumulated = []
88
- while not completion_event.is_set() or not response_queue.empty():
89
- if error_message[0]:
90
- yield f"Error: {error_message[0]}"
91
- return
92
-
93
- try:
94
- token = response_queue.get_nowait()
95
- accumulated.append(token)
96
- yield "".join(accumulated)
97
- except Empty:
98
- continue
99
-
100
- yield "".join(accumulated)
101
-
102
- with gr.Blocks() as demo:
103
- chat_interface = gr.ChatInterface(
104
- stream_generator,
105
- textbox=gr.Textbox(placeholder="Ask Mistral...", container=False),
106
- title="EDU CHAT BY PHANINDRA REDDY K",
107
- examples=[
108
- "Explain quantum physics simply",
109
- "Write a haiku about technology",
110
- "What's the meaning of life?"
111
- ],
112
- cache_examples=False,
113
- )
114
-
115
- with gr.Row():
116
- audio = gr.Audio(sources=["microphone"], type="numpy", label="Voice Input")
117
- transcribe_btn = gr.Button("Send Transcription")
118
-
119
- transcribe_btn.click(
120
- transcribe,
121
- inputs=audio,
122
- outputs=chat_interface.textbox
123
- )
124
-
125
- if __name__ == "__main__":
126
- demo.launch(share=True,debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import openvino_genai
5
+ from huggingface_hub import snapshot_download
6
+ from threading import Lock
7
+ import os
8
+ import numpy as np
9
+ import requests
10
+ from PIL import Image
11
+ from io import BytesIO
12
+ import cpuinfo
13
+ import openvino as ov
14
+ import librosa
15
+ from googleapiclient.discovery import build
16
+ import gc
17
+ import tempfile
18
+ from PyPDF2 import PdfReader
19
+ from docx import Document
20
+ import textwrap
21
+
22
+ # Google API configuration
23
+ GOOGLE_API_KEY = "AIzaSyAo-1iW5MEZbc53DlEldtnUnDaYuTHUDH4"
24
+ GOOGLE_CSE_ID = "3027bedf3c88a4efb"
25
+ DEFAULT_MAX_TOKENS = 100
26
+ DEFAULT_NUM_IMAGES = 1
27
+ MAX_HISTORY_TURNS = 3
28
+ MAX_TOKENS_LIMIT = 1000
29
+
30
+ class UnifiedAISystem:
31
+ def __init__(self):
32
+ self.pipe_lock = Lock()
33
+ self.current_df = None
34
+ self.mistral_pipe = None
35
+ self.internvl_pipe = None
36
+ self.whisper_pipe = None
37
+ self.current_document_text = None # Store document content
38
+ self.initialize_models()
39
+
40
+ def initialize_models(self):
41
+ """Initialize all required models"""
42
+ # Download models if not exists
43
+ if not os.path.exists("mistral-ov"):
44
+ snapshot_download(repo_id="OpenVINO/mistral-7b-instruct-v0.1-int8-ov", local_dir="mistral-ov")
45
+ if not os.path.exists("internvl-ov"):
46
+ snapshot_download(repo_id="OpenVINO/InternVL2-1B-int8-ov", local_dir="internvl-ov")
47
+ if not os.path.exists("whisper-ov-model"):
48
+ snapshot_download(repo_id="OpenVINO/whisper-tiny-fp16-ov", local_dir="whisper-ov-model")
49
+
50
+ # CPU-specific configuration
51
+ cpu_features = cpuinfo.get_cpu_info()['flags']
52
+ config_options = {}
53
+ if 'avx512' in cpu_features:
54
+ config_options["ENFORCE_BF16"] = "YES"
55
+ elif 'avx2' in cpu_features:
56
+ config_options["INFERENCE_PRECISION_HINT"] = "f32"
57
+
58
+ # Initialize Mistral model
59
+ self.mistral_pipe = openvino_genai.LLMPipeline(
60
+ "mistral-ov",
61
+ device="CPU",
62
+ config={"PERFORMANCE_HINT": "THROUGHPUT", **config_options}
63
+ )
64
+
65
+ # Initialize Whisper for audio processing
66
+ self.whisper_pipe = openvino_genai.WhisperPipeline("whisper-ov-model", device="CPU")
67
+
68
+ def load_data(self, file_path):
69
+ """Load student data from file"""
70
+ try:
71
+ file_ext = os.path.splitext(file_path)[1].lower()
72
+ if file_ext == '.csv':
73
+ self.current_df = pd.read_csv(file_path)
74
+ elif file_ext in ['.xlsx', '.xls']:
75
+ self.current_df = pd.read_excel(file_path)
76
+ else:
77
+ return False, "❌ Unsupported file format. Please upload a .csv or .xlsx file."
78
+ return True, f"✅ Loaded {len(self.current_df)} records from {os.path.basename(file_path)}"
79
+ except Exception as e:
80
+ return False, f"❌ Error loading file: {str(e)}"
81
+
82
+ def extract_text_from_document(self, file_path):
83
+ """Extract text from PDF or DOCX documents"""
84
+ text = ""
85
+ try:
86
+ file_ext = os.path.splitext(file_path)[1].lower()
87
+
88
+ if file_ext == '.pdf':
89
+ with open(file_path, 'rb') as file:
90
+ pdf_reader = PdfReader(file)
91
+ for page in pdf_reader.pages:
92
+ text += page.extract_text() + "\n"
93
+
94
+ elif file_ext == '.docx':
95
+ doc = Document(file_path)
96
+ for para in doc.paragraphs:
97
+ text += para.text + "\n"
98
+
99
+ else:
100
+ return False, "❌ Unsupported document format. Please upload PDF or DOCX."
101
+
102
+ # Clean and format text
103
+ text = text.replace('\x0c', '') # Remove form feed characters
104
+ text = textwrap.dedent(text) # Remove common leading whitespace
105
+ self.current_document_text = text
106
+ return True, f" Extracted text from {os.path.basename(file_path)}"
107
+
108
+ except Exception as e:
109
+ return False, f" Error processing document: {str(e)}"
110
+
111
+ def analyze_student_data(self, query):
112
+ """Analyze student data using AI with streaming"""
113
+ if not query or not query.strip():
114
+ yield "⚠️ Please enter a valid question"
115
+ return
116
+
117
+ if self.current_df is None:
118
+ yield "⚠️ Please upload and load a student data file first"
119
+ return
120
+
121
+ data_summary = self._prepare_data_summary(self.current_df)
122
+ prompt = f"""You are an expert education analyst. Analyze the following student performance data:
123
+ {data_summary}
124
+
125
+ Question: {query}
126
+
127
+ Please include:
128
+ 1. Direct answer to the question
129
+ 2. Relevant statistics
130
+ 3. Key insights
131
+ 4. Actionable recommendations
132
+
133
+ Format the output with clear headings"""
134
+
135
+ optimized_config = openvino_genai.GenerationConfig(
136
+ max_new_tokens=500,
137
+ temperature=0.3,
138
+ top_p=0.9,
139
+ streaming=True
140
+ )
141
+
142
+ full_response = ""
143
+ try:
144
+ with self.pipe_lock:
145
+ token_iterator = self.mistral_pipe.generate(prompt, optimized_config, streaming=True)
146
+ for token in token_iterator:
147
+ full_response += token
148
+ yield full_response
149
+ except Exception as e:
150
+ yield f"❌ Error during analysis: {str(e)}"
151
+
152
+ def _prepare_data_summary(self, df):
153
+ """Summarize the uploaded data"""
154
+ summary = f"Student performance data with {len(df)} rows and {len(df.columns)} columns.\n"
155
+ summary += "Columns: " + ", ".join(df.columns) + "\n"
156
+ summary += "First 3 rows:\n" + df.head(3).to_string(index=False)
157
+ return summary
158
+
159
+ def analyze_image(self, image, url, prompt):
160
+ """Analyze image with InternVL model"""
161
+ try:
162
+ if image is not None:
163
+ image_source = image
164
+ elif url and url.startswith(("http://", "https://")):
165
+ response = requests.get(url)
166
+ image_source = Image.open(BytesIO(response.content)).convert("RGB")
167
+ else:
168
+ return "⚠️ Please upload an image or enter a valid URL"
169
+
170
+ # Convert to OpenVINO tensor
171
+ image_data = np.array(image_source.getdata()).reshape(
172
+ 1, image_source.size[1], image_source.size[0], 3
173
+ ).astype(np.byte)
174
+ image_tensor = ov.Tensor(image_data)
175
+
176
+ # Lazy initialize InternVL
177
+ if self.internvl_pipe is None:
178
+ self.internvl_pipe = openvino_genai.VLMPipeline("internvl-ov", device="CPU")
179
+
180
+ with self.pipe_lock:
181
+ self.internvl_pipe.start_chat()
182
+ output = self.internvl_pipe.generate(prompt, image=image_tensor, max_new_tokens=100)
183
+ self.internvl_pipe.finish_chat()
184
+
185
+ return output
186
+ except Exception as e:
187
+ return f"❌ Error: {str(e)}"
188
+
189
+ def process_audio(self, data, sr):
190
+ """Process audio data for speech recognition"""
191
+ try:
192
+ # Convert to mono
193
+ if data.ndim > 1:
194
+ data = np.mean(data, axis=1) # Simple mono conversion
195
+ else:
196
+ data = data
197
+
198
+ # Convert to float32 and normalize
199
+ data = data.astype(np.float32)
200
+ max_val = np.max(np.abs(data)) + 1e-7
201
+ data /= max_val
202
+
203
+ # Simple noise reduction
204
+ data = np.clip(data, -0.5, 0.5)
205
+
206
+ # Trim silence
207
+ energy = np.abs(data)
208
+ threshold = np.percentile(energy, 25) # Simple threshold
209
+ mask = energy > threshold
210
+ indices = np.where(mask)[0]
211
+
212
+ if len(indices) > 0:
213
+ start = max(0, indices[0] - 1000)
214
+ end = min(len(data), indices[-1] + 1000)
215
+ data = data[start:end]
216
+
217
+ # Resample if needed using simpler method
218
+ if sr != 16000:
219
+ # Calculate new length
220
+ new_length = int(len(data) * 16000 / sr)
221
+ # Linear interpolation for resampling
222
+ data = np.interp(
223
+ np.linspace(0, len(data)-1, new_length),
224
+ np.arange(len(data)),
225
+ data
226
+ )
227
+ sr = 16000
228
+
229
+ return data
230
+ except Exception as e:
231
+ print(f"Audio processing error: {e}")
232
+ return np.array([], dtype=np.float32)
233
+
234
+ def transcribe(self, audio):
235
+ """Transcribe audio using Whisper model with improved error handling"""
236
+ if audio is None:
237
+ return ""
238
+ sr, data = audio
239
+
240
+ # Skip if audio is too short (less than 0.5 seconds)
241
+ if len(data)/sr < 0.5:
242
+ return ""
243
+
244
+ try:
245
+ processed = self.process_audio(data, sr)
246
+
247
+ # Skip if audio is still too short after processing
248
+ if len(processed) < 8000: # 0.5 seconds at 16kHz
249
+ return ""
250
+
251
+ # Use OpenVINO Whisper pipeline
252
+ result = self.whisper_pipe.generate(processed)
253
+ return result
254
+ except Exception as e:
255
+ print(f"Transcription error: {e}")
256
+ return "❌ Transcription failed - please try again"
257
+
258
+ def generate_lesson_plan(self, topic, duration, additional_instructions=""):
259
+ """Generate a lesson plan based on document content"""
260
+ if not self.current_document_text:
261
+ return "⚠️ Please upload and process a document first"
262
+
263
+ prompt = f"""As an expert educator, create a focused lesson plan using the provided content.
264
+
265
+ **Core Requirements:**
266
+ 1. TOPIC: {topic}
267
+ 2. TOTAL DURATION: {duration} periods
268
+ 3. ADDITIONAL INSTRUCTIONS: {additional_instructions or 'None'}
269
+
270
+ **Content Summary:**
271
+ {self.current_document_text[:2500]}... [truncated]
272
+
273
+ **Output Structure:**
274
+ 1. PERIOD ALLOCATION (Break topic into {duration} logical segments):
275
+ - Period 1: [Subtopic 1]
276
+ - Period 2: [Subtopic 2]
277
+ ...
278
+
279
+ 2. LEARNING OBJECTIVES (Max 3 bullet points)
280
+ 3. TEACHING ACTIVITIES (One engaging method per period)
281
+ 4. RESOURCES (Key materials from document)
282
+ 5. ASSESSMENT (Simple checks for understanding)
283
+ 6. PAGE REFERENCES (Specific source pages)
284
+
285
+ **Key Rules:**
286
+ - Strictly divide content into exactly {duration} periods
287
+ - Prioritize document content over creativity
288
+ - Keep objectives measurable
289
+ - Use only document resources
290
+ - Make page references specific"""
291
+
292
+
293
+ optimized_config = openvino_genai.GenerationConfig(
294
+ max_new_tokens=1200,
295
+ temperature=0.4,
296
+ top_p=0.85
297
+ )
298
+
299
+ try:
300
+ with self.pipe_lock:
301
+ return self.mistral_pipe.generate(prompt, optimized_config)
302
+ except Exception as e:
303
+ return f"❌ Error generating lesson plan: {str(e)}"
304
+
305
+ def fetch_images(self, query: str, num: int = DEFAULT_NUM_IMAGES) -> list:
306
+ """Fetch unique images by requesting different result pages"""
307
+ if num <= 0:
308
+ return []
309
+
310
+ try:
311
+ service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
312
+ image_links = []
313
+ seen_urls = set() # To track unique URLs
314
+
315
+ # Start from different positions to get unique images
316
+ for start_index in range(1, num * 2, 2):
317
+ if len(image_links) >= num:
318
+ break
319
+
320
+ res = service.cse().list(
321
+ q=query,
322
+ cx=GOOGLE_CSE_ID,
323
+ searchType="image",
324
+ num=1,
325
+ start=start_index
326
+ ).execute()
327
+
328
+ if "items" in res and res["items"]:
329
+ item = res["items"][0]
330
+ # Skip duplicates
331
+ if item["link"] not in seen_urls:
332
+ image_links.append(item["link"])
333
+ seen_urls.add(item["link"])
334
+
335
+ return image_links[:num]
336
+ except Exception as e:
337
+ print(f"Error in image fetching: {e}")
338
+ return []
339
+
340
+ def stream_answer(self, message: str, max_tokens: int) -> str:
341
+ """Stream tokens with typing indicator"""
342
+ optimized_config = openvino_genai.GenerationConfig(
343
+ max_new_tokens=max_tokens,
344
+ temperature=0.7,
345
+ top_p=0.9,
346
+ streaming=True
347
+ )
348
+
349
+ full_response = ""
350
+ try:
351
+ with self.pipe_lock:
352
+ token_iterator = self.mistral_pipe.generate(message, optimized_config, streaming=True)
353
+ for token in token_iterator:
354
+ full_response += token
355
+ yield full_response
356
+ # Periodic garbage collection
357
+ if len(full_response) % 20 == 0:
358
+ gc.collect()
359
+ except Exception as e:
360
+ yield f"❌ Error: {str(e)}"
361
+
362
+ # Initialize global object
363
+ ai_system = UnifiedAISystem()
364
+
365
+ # CSS styles with improved output box
366
+ css = """
367
+ .gradio-container {
368
+ background-color: #121212;
369
+ color: #fff;
370
+ }
371
+ .user-msg, .bot-msg {
372
+ padding: 12px 16px;
373
+ border-radius: 18px;
374
+ margin: 8px 0;
375
+ line-height: 1.5;
376
+ border: none;
377
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
378
+ }
379
+ .user-msg {
380
+ background: linear-gradient(135deg, #4a5568, #2d3748);
381
+ color: white;
382
+ margin-left: 20%;
383
+ border-bottom-right-radius: 5px;
384
+ border: none;
385
+ }
386
+ .bot-msg {
387
+ background: linear-gradient(135deg, #2d3748, #1a202c);
388
+ color: white;
389
+ margin-right: 20%;
390
+ border-bottom-left-radius: 5px;
391
+ border: none;
392
+ }
393
+ /* Remove top border from chat messages */
394
+ .user-msg, .bot-msg {
395
+ border-top: none !important;
396
+ }
397
+ /* Remove borders from chat container */
398
+ .chatbot > div {
399
+ border: none !important;
400
+ }
401
+ .chatbot .message {
402
+ border: none !important;
403
+ }
404
+ /* Improve scrollbar */
405
+ .chatbot::-webkit-scrollbar {
406
+ width: 8px;
407
+ }
408
+ .chatbot::-webkit-scrollbar-track {
409
+ background: #2a2a2a;
410
+ border-radius: 4px;
411
+ }
412
+ .chatbot::-webkit-scrollbar-thumb {
413
+ background: #4a5568;
414
+ border-radius: 4px;
415
+ }
416
+ .chatbot::-webkit-scrollbar-thumb:hover {
417
+ background: #5a6578;
418
+ }
419
+ /* Rest of the CSS remains the same */
420
+ .gradio-container {
421
+ background-color: #121212;
422
+ color: #fff;
423
+ }
424
+ .upload-box {
425
+ background-color: #333;
426
+ border-radius: 8px;
427
+ padding: 16px;
428
+ margin-bottom: 16px;
429
+ }
430
+ #question-input {
431
+ background-color: #333;
432
+ color: #fff;
433
+ border-radius: 8px;
434
+ padding: 12px;
435
+ border: 1px solid #555;
436
+ }
437
+ .mode-checkbox {
438
+ background-color: #333;
439
+ color: #fff;
440
+ border: 1px solid #555;
441
+ border-radius: 8px;
442
+ padding: 10px;
443
+ margin: 5px;
444
+ }
445
+ .slider-container {
446
+ margin-top: 20px;
447
+ padding: 15px;
448
+ border-radius: 10px;
449
+ background-color: #2a2a2a;
450
+ }
451
+ .system-info {
452
+ background-color: #7B9BDB;
453
+ padding: 15px;
454
+ border-radius: 8px;
455
+ margin: 15px 0;
456
+ border-left: 4px solid #1890ff;
457
+ }
458
+ .chat-image {
459
+ cursor: pointer;
460
+ transition: transform 0.2s;
461
+ max-height: 100px;
462
+ margin: 4px;
463
+ border-radius: 8px;
464
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
465
+ }
466
+ .chat-image:hover {
467
+ transform: scale(1.05);
468
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2);
469
+ }
470
+ .modal {
471
+ position: fixed;
472
+ top: 0;
473
+ left: 0;
474
+ width: 100%;
475
+ height: 100%;
476
+ background: rgba(0,0,0,0.8);
477
+ display: none;
478
+ z-index: 1000;
479
+ cursor: zoom-out;
480
+ }
481
+ .modal-content {
482
+ position: absolute;
483
+ top: 50%;
484
+ left: 50%;
485
+ transform: translate(-50%, -50%);
486
+ max-width: 90%;
487
+ max-height: 90%;
488
+ background: white;
489
+ padding: 10px;
490
+ border-radius: 12px;
491
+ }
492
+ .modal-img {
493
+ width: auto;
494
+ height: auto;
495
+ max-width: 100%;
496
+ max-height: 100%;
497
+ border-radius: 8px;
498
+ }
499
+ .typing-indicator {
500
+ display: inline-block;
501
+ position: relative;
502
+ width: 40px;
503
+ height: 20px;
504
+ }
505
+ .typing-dot {
506
+ display: inline-block;
507
+ width: 6px;
508
+ height: 6px;
509
+ border-radius: 50%;
510
+ background-color: #fff;
511
+ position: absolute;
512
+ animation: typing 1.4s infinite ease-in-out;
513
+ }
514
+ .typing-dot:nth-child(1) {
515
+ left: 0;
516
+ animation-delay: 0s;
517
+ }
518
+ .typing-dot:nth-child(2) {
519
+ left: 12px;
520
+ animation-delay: 0.2s;
521
+ }
522
+ .typing-dot:nth-child(3) {
523
+ left: 24px;
524
+ animation-delay: 0.4s;
525
+ }
526
+ @keyframes typing {
527
+ 0%, 60%, 100% { transform: translateY(0); }
528
+ 30% { transform: translateY(-5px); }
529
+ }
530
+ .lesson-plan {
531
+ background: linear-gradient(135deg, #1a202c, #2d3748);
532
+ padding: 15px;
533
+ border-radius: 12px;
534
+ margin: 10px 0;
535
+ border-left: 4px solid #4a9df0;
536
+ }
537
+ .lesson-section {
538
+ margin-bottom: 15px;
539
+ padding-bottom: 10px;
540
+ border-bottom: 1px solid #4a5568;
541
+ }
542
+ .lesson-title {
543
+ font-size: 1.2em;
544
+ font-weight: bold;
545
+ color: #4a9df0;
546
+ margin-bottom: 8px;
547
+ }
548
+ .page-ref {
549
+ background-color: #4a5568;
550
+ padding: 3px 8px;
551
+ border-radius: 4px;
552
+ font-size: 0.9em;
553
+ display: inline-block;
554
+ margin: 3px;
555
+ }
556
+ """
557
+
558
+ # Create Gradio interface
559
+ with gr.Blocks(css=css, title="Unified EDU Assistant") as demo:
560
+ gr.Markdown("# 🤖 Unified EDU Assistant by Phanindra Reddy K")
561
+
562
+ # System info banner
563
+ gr.HTML("""
564
+ <div class="system-info">
565
+ <strong>Multi-Modal AI Assistant</strong>
566
+ <ul>
567
+ <li>Text & Voice Chat with Mistral-7B</li>
568
+ <li>Image Understanding with InternVL</li>
569
+ <li>Student Data Analysis</li>
570
+ <li>Visual Search with Google Images</li>
571
+ <li>Lesson Planning from Documents</li>
572
+ </ul>
573
+ </div>
574
+ """)
575
+
576
+ # Modal for image preview
577
+ modal_html = """
578
+ <div class="modal" id="imageModal" onclick="this.style.display='none'">
579
+ <div class="modal-content">
580
+ <img class="modal-img" id="expandedImg">
581
+ </div>
582
+ </div>
583
+ <script>
584
+ function showImage(url) {
585
+ document.getElementById('expandedImg').src = url;
586
+ document.getElementById('imageModal').style.display = 'block';
587
+ }
588
+ </script>
589
+ """
590
+ gr.HTML(modal_html)
591
+
592
+ chat_state = gr.State([])
593
+ with gr.Column(scale=2, elem_classes="chat-container"):
594
+ chatbot = gr.Chatbot(label="Conversation", height=500, bubble_full_width=False,
595
+ avatar_images=("user.png", "bot.png"), show_label=False)
596
+
597
+ # Mode selection
598
+ with gr.Row():
599
+ chat_mode = gr.Checkbox(label="💬 General Chat", value=True, elem_classes="mode-checkbox")
600
+ student_mode = gr.Checkbox(label="🎓 Student Analytics", value=False, elem_classes="mode-checkbox")
601
+ image_mode = gr.Checkbox(label="🖼️ Image Analysis", value=False, elem_classes="mode-checkbox")
602
+ lesson_mode = gr.Checkbox(label="📝 Lesson Planning", value=False, elem_classes="mode-checkbox")
603
+
604
+ # Dynamic input fields
605
+ with gr.Column() as chat_inputs:
606
+ include_images = gr.Checkbox(label="Include Visuals", value=True)
607
+ user_input = gr.Textbox(
608
+ placeholder="Type your question here...",
609
+ label="Your Question",
610
+ container=False,
611
+ elem_id="question-input"
612
+ )
613
+ with gr.Row():
614
+ max_tokens = gr.Slider(
615
+ minimum=10,
616
+ maximum=1000,
617
+ value=100,
618
+ step=10,
619
+ label="Response Length (Tokens)"
620
+ )
621
+ num_images = gr.Slider(
622
+ minimum=0,
623
+ maximum=5,
624
+ value=1,
625
+ step=1,
626
+ label="Number of Images",
627
+ visible=True
628
+ )
629
+
630
+ with gr.Column(visible=False) as student_inputs:
631
+ file_upload = gr.File(label="CSV/Excel File", file_types=[".csv", ".xlsx"], type="filepath")
632
+ student_question = gr.Textbox(
633
+ placeholder="Ask questions about student data...",
634
+ label="Your Question",
635
+ elem_id="question-input"
636
+ )
637
+ student_status = gr.Markdown("No file loaded")
638
+
639
+ with gr.Column(visible=False) as image_inputs:
640
+ image_upload = gr.Image(type="pil", label="Upload Image")
641
+ image_url = gr.Textbox(
642
+ label="OR Enter Image URL",
643
+ placeholder="https://example.com/image.jpg",
644
+ elem_id="question-input"
645
+ )
646
+ image_question = gr.Textbox(
647
+ placeholder="Ask questions about the image...",
648
+ label="Your Question",
649
+ elem_id="question-input"
650
+ )
651
+
652
+ # Lesson planning section
653
+ with gr.Column(visible=False) as lesson_inputs:
654
+ gr.Markdown("### 📚 Lesson Planning")
655
+ doc_upload = gr.File(
656
+ label="Upload Curriculum Document (PDF/DOCX)",
657
+ file_types=[".pdf", ".docx"],
658
+ type="filepath"
659
+ )
660
+ doc_status = gr.Markdown("No document uploaded")
661
+
662
+ with gr.Row():
663
+ topic_input = gr.Textbox(
664
+ label="Lesson Topic",
665
+ placeholder="Enter the main topic for the lesson plan"
666
+ )
667
+ duration_input = gr.Number(
668
+ label="Total Periods",
669
+ value=5,
670
+ minimum=1,
671
+ maximum=20,
672
+ step=1
673
+ )
674
+
675
+ additional_instructions = gr.Textbox(
676
+ label="Additional Requirements (optional)",
677
+ placeholder="Specific teaching methods, resources, or special considerations..."
678
+ )
679
+
680
+ generate_btn = gr.Button("Generate Lesson Plan", variant="primary")
681
+
682
+ # Common controls
683
+ with gr.Row():
684
+ submit_btn = gr.Button("Send", variant="primary")
685
+ mic_btn = gr.Button("Transcribe Voice", variant="secondary")
686
+ mic = gr.Audio(sources=["microphone"], type="numpy", label="Voice Input")
687
+
688
+ processing = gr.HTML("""
689
+ <div style="display: none;">
690
+ <div class="processing">🔮 Processing your request...</div>
691
+ </div>
692
+ """)
693
+
694
+ # Event handlers
695
+ def toggle_modes(chat, student, image, lesson):
696
+ return [
697
+ gr.update(visible=chat),
698
+ gr.update(visible=student),
699
+ gr.update(visible=image),
700
+ gr.update(visible=lesson)
701
+ ]
702
+
703
+ def load_student_file(file_path):
704
+ success, message = ai_system.load_data(file_path)
705
+ return message
706
+
707
+ def process_document(file_path):
708
+ if not file_path:
709
+ return "⚠️ Please select a document first"
710
+ success, message = ai_system.extract_text_from_document(file_path)
711
+ return message
712
+
713
+ def render_history(history):
714
+ """Render chat history with images and proper formatting"""
715
+ rendered = []
716
+ for user_msg, bot_msg, image_links in history:
717
+ # Apply proper styling to messages
718
+ user_html = f"<div class='user-msg'>{user_msg}</div>"
719
+
720
+ # Special formatting for lesson plans
721
+ if "Lesson Plan:" in bot_msg:
722
+ bot_html = f"<div class='lesson-plan'>{bot_msg}</div>"
723
+ else:
724
+ bot_html = f"<div class='bot-msg'>{bot_msg}</div>"
725
+
726
+ # Add images if available
727
+ if image_links:
728
+ images_html = "".join(
729
+ f"<img src='{url}' class='chat-image' onclick='showImage(\"{url}\")' />"
730
+ for url in image_links
731
+ )
732
+ bot_html += f"<br><br><b>📸 Related Visuals:</b><br><div style='display: flex; flex-wrap: wrap;'>{images_html}</div>"
733
+
734
+ rendered.append((user_html, bot_html))
735
+ return rendered
736
+
737
+ def respond(message, chat_hist, chat, student, image, lesson,
738
+ tokens, student_q, image_q, image_upload, image_url,
739
+ include_visuals, num_imgs):
740
+ # If in lesson planning mode, skip this handler
741
+ if lesson:
742
+ return chat_hist, message
743
+
744
+ # Determine the actual question based on mode
745
+ if chat:
746
+ actual_question = message
747
+ elif student:
748
+ actual_question = student_q
749
+ elif image:
750
+ actual_question = image_q
751
+ else:
752
+ actual_question = message
753
+
754
+ # Immediately show user question in chat
755
+ typing_html = "<div class='typing-indicator'><div class='typing-dot'></div><div class='typing-dot'></div><div class='typing-dot'></div></div>"
756
+ chat_hist.append((actual_question, typing_html, []))
757
+ yield render_history(chat_hist), ""
758
+
759
+ if chat:
760
+ # General chat mode
761
+ full_response = ""
762
+ for chunk in ai_system.stream_answer(message, tokens):
763
+ full_response = chunk
764
+ # Update with current response
765
+ chat_hist[-1] = (actual_question, full_response, [])
766
+ yield render_history(chat_hist), ""
767
+
768
+ # Fetch images if requested
769
+ image_links = []
770
+ if include_visuals and num_imgs > 0:
771
+ image_links = ai_system.fetch_images(message, num_imgs)
772
+
773
+ # Update with final response and images
774
+ chat_hist[-1] = (actual_question, full_response, image_links)
775
+ yield render_history(chat_hist), ""
776
+
777
+ elif student:
778
+ # Student analytics mode
779
+ if ai_system.current_df is None:
780
+ chat_hist[-1] = (actual_question, "⚠️ Please upload a student data file first", [])
781
+ yield render_history(chat_hist), ""
782
+ else:
783
+ response = ""
784
+ for chunk in ai_system.analyze_student_data(student_q):
785
+ response = chunk
786
+ chat_hist[-1] = (actual_question, response, [])
787
+ yield render_history(chat_hist), ""
788
+
789
+ elif image:
790
+ # Image analysis mode
791
+ if not image_upload and not image_url:
792
+ chat_hist[-1] = (actual_question, "⚠️ Please upload an image or enter a URL", [])
793
+ yield render_history(chat_hist), ""
794
+ else:
795
+ try:
796
+ result = ai_system.analyze_image(image_upload, image_url, image_q)
797
+ chat_hist[-1] = (actual_question, result, [])
798
+ yield render_history(chat_hist), ""
799
+ except Exception as e:
800
+ error_msg = f"❌ Error analyzing image: {str(e)}"
801
+ chat_hist[-1] = (actual_question, error_msg, [])
802
+ yield render_history(chat_hist), ""
803
+
804
+ # Trim history if too long
805
+ if len(chat_hist) > MAX_HISTORY_TURNS:
806
+ chat_hist = chat_hist[-MAX_HISTORY_TURNS:]
807
+
808
+ yield render_history(chat_hist), ""
809
+
810
+ def generate_lesson_plan(topic, duration, instructions, chat_hist):
811
+ if not topic:
812
+ return chat_hist, "⚠️ Please enter a lesson topic"
813
+
814
+ # Show processing message
815
+ processing_msg = "<div class='typing-indicator'><div class='typing-dot'></div><div class='typing-dot'></div><div class='typing-dot'></div></div>"
816
+ chat_hist.append((f"Generate lesson plan for: {topic}", processing_msg, []))
817
+ yield render_history(chat_hist), ""
818
+
819
+ # Generate the plan
820
+ plan = ai_system.generate_lesson_plan(topic, duration, instructions)
821
+
822
+ # Format with proper headings
823
+ formatted_plan = f"""
824
+ <div class='lesson-plan'>
825
+ <div class='lesson-title'>📝 Lesson Plan: {topic} ({duration} periods)</div>
826
+ {plan}
827
+ </div>
828
+ """
829
+
830
+ # Update chat history with final plan
831
+ chat_hist[-1] = (
832
+ f"Generate lesson plan for: {topic}",
833
+ formatted_plan,
834
+ []
835
+ )
836
+ yield render_history(chat_hist), ""
837
+
838
+ # Mode toggles
839
+ chat_mode.change(fn=toggle_modes, inputs=[chat_mode, student_mode, image_mode, lesson_mode],
840
+ outputs=[chat_inputs, student_inputs, image_inputs, lesson_inputs])
841
+ student_mode.change(fn=toggle_modes, inputs=[chat_mode, student_mode, image_mode, lesson_mode],
842
+ outputs=[chat_inputs, student_inputs, image_inputs, lesson_inputs])
843
+ image_mode.change(fn=toggle_modes, inputs=[chat_mode, student_mode, image_mode, lesson_mode],
844
+ outputs=[chat_inputs, student_inputs, image_inputs, lesson_inputs])
845
+ lesson_mode.change(fn=toggle_modes, inputs=[chat_mode, student_mode, image_mode, lesson_mode],
846
+ outputs=[chat_inputs, student_inputs, image_inputs, lesson_inputs])
847
+
848
+ # File upload handler
849
+ file_upload.change(fn=load_student_file, inputs=file_upload, outputs=student_status)
850
+
851
+ # Document upload handler
852
+ doc_upload.change(fn=process_document, inputs=doc_upload, outputs=doc_status)
853
+
854
+ # Voice transcription
855
+ def transcribe_audio(audio):
856
+ return ai_system.transcribe(audio)
857
+
858
+ mic_btn.click(fn=transcribe_audio, inputs=mic, outputs=user_input)
859
+
860
+ # Submit handler
861
+ submit_btn.click(
862
+ fn=respond,
863
+ inputs=[
864
+ user_input, chat_state, chat_mode, student_mode, image_mode, lesson_mode,
865
+ max_tokens, student_question, image_question, image_upload, image_url,
866
+ include_images, num_images
867
+ ],
868
+ outputs=[chatbot, user_input]
869
+ )
870
+
871
+ # Lesson plan generation button
872
+ generate_btn.click(
873
+ fn=generate_lesson_plan,
874
+ inputs=[topic_input, duration_input, additional_instructions, chat_state],
875
+ outputs=[chatbot, topic_input]
876
+ )
877
+
878
+ if __name__ == "__main__":
879
+ demo.launch(share=True, debug=True)
requirements.txt CHANGED
@@ -1,6 +1,15 @@
1
- gradio==4.26.0
2
- openvino-genai>=1.0.0
3
- librosa>=0.10.0
4
- numpy>=1.24.0
5
- scipy>=1.10.0
6
- huggingface_hub>=0.21.4
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.26.0
2
+ openvino-genai>=1.0.0
3
+ librosa==0.10.0
4
+ numpy>=1.24.0
5
+ scipy>=1.10.0
6
+ huggingface_hub>=0.21.4
7
+ google-api-python-client>=2.0.0
8
+ pandas>=2.0.0
9
+ requests>=2.31.0
10
+ Pillow>=10.0.0
11
+ py-cpuinfo>=9.0.0
12
+ openvino>=2023.2.0
13
+ PyPDF2>=3.0.0
14
+ python-docx>=1.1.0
15
+ soundfile>=0.12.0