phani50101 commited on
Commit
88bd3ae
·
1 Parent(s): 4ad1a5e

Add application file

Browse files
Files changed (2) hide show
  1. app.py +495 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from huggingface_hub import snapshot_download
3
+ import gradio as gr
4
+ import openvino_genai
5
+ import librosa
6
+ import numpy as np
7
+ from threading import Lock, Event
8
+ from scipy.ndimage import uniform_filter1d
9
+ from queue import Queue, Empty
10
+ from googleapiclient.discovery import build
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
+ import time
13
+ import cpuinfo
14
+ import gc
15
+ import os
16
+
17
+ # Set CPU affinity for optimization
18
+ os.environ["GOMP_CPU_AFFINITY"] = "0-7" # Use first 8 CPU cores
19
+ os.environ["OMP_NUM_THREADS"] = "8"
20
+
21
+ # Configuration constants
22
+ GOOGLE_API_KEY = "AIzaSyAo-1iW5MEZbc53DlEldtnUnDaYuTHUDH4"
23
+ GOOGLE_CSE_ID = "3027bedf3c88a4efb"
24
+ DEFAULT_MAX_TOKENS = 100
25
+ DEFAULT_NUM_IMAGES = 1
26
+ MAX_HISTORY_TURNS = 2
27
+ MAX_TOKENS_LIMIT = 1000
28
+
29
+ # Download models
30
+ start_time = time.time()
31
+ snapshot_download(repo_id="OpenVINO/mistral-7b-instruct-v0.1-int8-ov", local_dir="mistral-ov")
32
+ snapshot_download(repo_id="OpenVINO/whisper-tiny-fp16-ov", local_dir="whisper-ov-model")
33
+ print(f"Model download time: {time.time() - start_time:.2f} seconds")
34
+
35
+ # CPU-specific configuration
36
+ cpu_features = cpuinfo.get_cpu_info()['flags']
37
+ config_options = {}
38
+ if 'avx512' in cpu_features:
39
+ config_options["ENFORCE_BF16"] = "YES"
40
+ print("Using AVX512 optimizations")
41
+ elif 'avx2' in cpu_features:
42
+ config_options["INFERENCE_PRECISION_HINT"] = "f32"
43
+ print("Using AVX2 optimizations")
44
+
45
+ # Initialize models with performance flags
46
+ start_time = time.time()
47
+ mistral_pipe = openvino_genai.LLMPipeline(
48
+ "mistral-ov",
49
+ device="CPU",
50
+ config={
51
+ "PERFORMANCE_HINT": "THROUGHPUT",
52
+ **config_options
53
+ }
54
+ )
55
+
56
+ whisper_pipe = openvino_genai.WhisperPipeline(
57
+ "whisper-ov-model",
58
+ device="CPU"
59
+ )
60
+ pipe_lock = Lock()
61
+ print(f"Model initialization time: {time.time() - start_time:.2f} seconds")
62
+
63
+ # Warm up models
64
+ print("Warming up models...")
65
+ start_time = time.time()
66
+ with pipe_lock:
67
+ mistral_pipe.generate("Warmup", openvino_genai.GenerationConfig(max_new_tokens=10))
68
+ whisper_pipe.generate(np.zeros(16000, dtype=np.float32))
69
+ print(f"Model warmup time: {time.time() - start_time:.2f} seconds")
70
+
71
+ # Thread pools
72
+ generation_executor = ThreadPoolExecutor(max_workers=4) # Increased workers
73
+ image_executor = ThreadPoolExecutor(max_workers=8)
74
+
75
+ def fetch_images(query: str, num: int = DEFAULT_NUM_IMAGES) -> list:
76
+ """Fetch images in parallel using ThreadPoolExecutor"""
77
+ start_time = time.time()
78
+
79
+ if num <= 0:
80
+ return []
81
+
82
+ try:
83
+ futures = []
84
+ service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
85
+
86
+ for _ in range(num):
87
+ future = image_executor.submit(
88
+ service.cse().list(q=query, cx=GOOGLE_CSE_ID, searchType="image", num=1).execute
89
+ )
90
+ futures.append(future)
91
+
92
+ image_links = []
93
+ for future in as_completed(futures):
94
+ try:
95
+ res = future.result()
96
+ if "items" in res and res["items"]:
97
+ image_links.append(res["items"][0]["link"])
98
+ except Exception as e:
99
+ print(f"Image fetch error: {e}")
100
+
101
+ print(f"Parallel image fetch time: {time.time() - start_time:.2f} seconds")
102
+ return image_links
103
+ except Exception as e:
104
+ print(f"Error in image fetching: {e}")
105
+ return []
106
+
107
+ def process_audio(data, sr):
108
+ start_time = time.time()
109
+ data = librosa.to_mono(data.T) if data.ndim > 1 else data
110
+ data = data.astype(np.float32)
111
+ data /= np.max(np.abs(data))
112
+ rms = librosa.feature.rms(y=data, frame_length=2048, hop_length=512)[0]
113
+ smoothed_rms = uniform_filter1d(rms, size=5)
114
+ speech_frames = np.where(smoothed_rms > 0.025)[0]
115
+ if not speech_frames.size:
116
+ print(f"Audio processing time: {time.time() - start_time:.2f} seconds")
117
+ return None
118
+ start = max(0, int(speech_frames[0] * 512 - 0.1 * sr))
119
+ end = min(len(data), int((speech_frames[-1] + 1) * 512 + 0.1 * sr))
120
+ print(f"Audio processing time: {time.time() - start_time:.2f} seconds")
121
+ return data[start:end]
122
+
123
+ def transcribe(audio):
124
+ start_time = time.time()
125
+ if audio is None:
126
+ print(f"Transcription time: {time.time() - start_time:.2f} seconds")
127
+ return ""
128
+ sr, data = audio
129
+ processed = process_audio(data, sr)
130
+ if processed is None or len(processed) < 1600:
131
+ print(f"Transcription time: {time.time() - start_time:.2f} seconds")
132
+ return ""
133
+ if sr != 16000:
134
+ processed = librosa.resample(processed, orig_sr=sr, target_sr=16000)
135
+ result = whisper_pipe.generate(processed)
136
+ print(f"Transcription time: {time.time() - start_time:.2f} seconds")
137
+ return result
138
+
139
+ def stream_answer(message: str, max_tokens: int, include_images: bool) -> str:
140
+ start_time = time.time()
141
+ response_queue = Queue()
142
+ completion_event = Event()
143
+ error = [None]
144
+
145
+ optimized_config = openvino_genai.GenerationConfig(
146
+ max_new_tokens=max_tokens,
147
+ num_beams=1,
148
+ do_sample=False,
149
+ temperature=1.0,
150
+ top_p=0.9,
151
+ top_k=30,
152
+ streaming=True,
153
+ streaming_interval=5 # Batch tokens in groups of 5
154
+ )
155
+
156
+ def callback(tokens): # Now accepts multiple tokens
157
+ response_queue.put("".join(tokens))
158
+ return openvino_genai.StreamingStatus.RUNNING
159
+
160
+ def generate():
161
+ try:
162
+ with pipe_lock:
163
+ mistral_pipe.generate(message, optimized_config, callback)
164
+ except Exception as e:
165
+ error[0] = str(e)
166
+ finally:
167
+ completion_event.set()
168
+
169
+ generation_executor.submit(generate)
170
+
171
+ accumulated = []
172
+ token_count = 0
173
+ last_gc = time.time()
174
+
175
+ while not completion_event.is_set() or not response_queue.empty():
176
+ if error[0]:
177
+ yield f"Error: {error[0]}"
178
+ print(f"Stream answer time: {time.time() - start_time:.2f} seconds")
179
+ return
180
+
181
+ try:
182
+ token_batch = response_queue.get_nowait()
183
+ accumulated.append(token_batch)
184
+ token_count += len(token_batch)
185
+
186
+ # Periodic garbage collection
187
+ if time.time() - last_gc > 2.0: # Every 2 seconds
188
+ gc.collect()
189
+ last_gc = time.time()
190
+
191
+ yield "".join(accumulated)
192
+ except Empty:
193
+ continue
194
+
195
+ print(f"Generated {token_count} tokens in {time.time() - start_time:.2f} seconds "
196
+ f"({token_count/(time.time() - start_time):.2f} tokens/sec)")
197
+ yield "".join(accumulated)
198
+
199
+ def run_chat(message: str, history: list, include_images: bool, max_tokens: int, num_images: int):
200
+ start_time = time.time()
201
+ final_text = ""
202
+
203
+ # Create a placeholder for the streaming response
204
+ history.append((message, "", []))
205
+ rendered_history = render_history(history)
206
+ yield rendered_history, gr.update(value="", interactive=False)
207
+
208
+ # Stream tokens and update chatbot in real-time
209
+ for output in stream_answer(message, max_tokens, include_images):
210
+ final_text = output
211
+ # Update only the last response in history
212
+ updated_history = history[:-1] + [(message, final_text, [])]
213
+ rendered_history = render_history(updated_history)
214
+ yield rendered_history, gr.update(value="", interactive=False)
215
+
216
+ images = []
217
+ if include_images:
218
+ images = fetch_images(message, num_images)
219
+
220
+ # Update history with final response and images
221
+ history[-1] = (message, final_text, images)
222
+ if len(history) > MAX_HISTORY_TURNS:
223
+ history = history[-MAX_HISTORY_TURNS:]
224
+
225
+ rendered_history = render_history(history)
226
+ print(f"Total chat time: {time.time() - start_time:.2f} seconds")
227
+ yield rendered_history, gr.update(value="", interactive=True)
228
+
229
+ def render_history(history):
230
+ start_time = time.time()
231
+ rendered = []
232
+ for user_msg, bot_msg, image_links in history:
233
+ text = bot_msg
234
+ if image_links:
235
+ images_html = "".join(
236
+ f"<img src='{url}' class='chat-image' onclick='showImage(\"{url}\")' />"
237
+ for url in image_links
238
+ )
239
+ text += f"<br><br><b>📸 Related Visuals:</b><br><div style='display: flex; flex-wrap: wrap;'>{images_html}</div>"
240
+ rendered.append((user_msg, text))
241
+
242
+ return rendered
243
+
244
+ with gr.Blocks(css="""
245
+ .processing {
246
+ animation: pulse 1.5s infinite;
247
+ color: #4a5568;
248
+ padding: 10px;
249
+ border-radius: 5px;
250
+ text-align: center;
251
+ margin: 10px 0;
252
+ }
253
+ @keyframes pulse {
254
+ 0%, 100% { opacity: 1; }
255
+ 50% { opacity: 0.5; }
256
+ }
257
+ .chat-image {
258
+ cursor: pointer;
259
+ transition: transform 0.2s;
260
+ max-height: 100px;
261
+ margin: 4px;
262
+ border-radius: 8px;
263
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
264
+ }
265
+ .chat-image:hover {
266
+ transform: scale(1.05);
267
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2);
268
+ }
269
+ .modal {
270
+ position: fixed;
271
+ top: 0;
272
+ left: 0;
273
+ width: 100%;
274
+ height: 100%;
275
+ background: rgba(0,0,0,0.8);
276
+ display: none;
277
+ z-index: 1000;
278
+ cursor: zoom-out;
279
+ }
280
+ .modal-content {
281
+ position: absolute;
282
+ top: 50%;
283
+ left: 50%;
284
+ transform: translate(-50%, -50%);
285
+ max-width: 90%;
286
+ max-height: 90%;
287
+ background: white;
288
+ padding: 10px;
289
+ border-radius: 12px;
290
+ }
291
+ .modal-img {
292
+ width: auto;
293
+ height: auto;
294
+ max-width: 100%;
295
+ max-height: 100%;
296
+ border-radius: 8px;
297
+ }
298
+ .chat-container {
299
+ border: 1px solid #e5e7eb;
300
+ border-radius: 12px;
301
+ padding: 20px;
302
+ margin-bottom: 20px;
303
+ }
304
+ .slider-container {
305
+ margin-top: 20px;
306
+ padding: 15px;
307
+ border-radius: 10px;
308
+ background-color: #f8f9fa;
309
+ }
310
+ .slider-label {
311
+ font-weight: bold;
312
+ margin-bottom: 5px;
313
+ }
314
+ .system-info {
315
+ background-color: #7B9BDB;
316
+ padding: 15px;
317
+ border-radius: 8px;
318
+ margin: 15px 0;
319
+ border-left: 4px solid #1890ff;
320
+ }
321
+ .typing-indicator {
322
+ display: inline-block;
323
+ position: relative;
324
+ width: 40px;
325
+ height: 20px;
326
+ }
327
+ .typing-dot {
328
+ display: inline-block;
329
+ width: 6px;
330
+ height: 6px;
331
+ border-radius: 50%;
332
+ background-color: #4a5568;
333
+ position: absolute;
334
+ animation: typing 1.4s infinite ease-in-out;
335
+ }
336
+ .typing-dot:nth-child(1) {
337
+ left: 0;
338
+ animation-delay: 0s;
339
+ }
340
+ .typing-dot:nth-child(2) {
341
+ left: 12px;
342
+ animation-delay: 0.2s;
343
+ }
344
+ .typing-dot:nth-child(3) {
345
+ left: 24px;
346
+ animation-delay: 0.4s;
347
+ }
348
+ @keyframes typing {
349
+ 0%, 60%, 100% { transform: translateY(0); }
350
+ 30% { transform: translateY(-5px); }
351
+ }
352
+ """) as demo:
353
+ gr.Markdown("# 🤖 EDU CHAT BY PHANINDRA REDDY K")
354
+
355
+ # System info banner
356
+ gr.HTML("""
357
+ <div class="system-info">
358
+ <strong>Performance Optimized for High-RAM Systems</strong>
359
+
360
+ <ul>
361
+
362
+ <li>Adaptive resource allocation based on request type</li>
363
+
364
+ </ul>
365
+ </div>
366
+ """)
367
+
368
+ modal_html = """
369
+ <div class="modal" id="imageModal" onclick="this.style.display='none'">
370
+ <div class="modal-content">
371
+ <img class="modal-img" id="expandedImg">
372
+ </div>
373
+ </div>
374
+ <script>
375
+ function showImage(url) {
376
+ document.getElementById('expandedImg').src = url;
377
+ document.getElementById('imageModal').style.display = 'block';
378
+ }
379
+ </script>
380
+ """
381
+ gr.HTML(modal_html)
382
+
383
+ state = gr.State([])
384
+
385
+ with gr.Column(scale=2, elem_classes="chat-container"):
386
+ chatbot = gr.Chatbot(label="Conversation", height=500, bubble_full_width=False)
387
+
388
+ with gr.Column(scale=1):
389
+ gr.Markdown("### 💬 Ask Your Question")
390
+
391
+ with gr.Row():
392
+ user_input = gr.Textbox(
393
+ placeholder="Type your question here...",
394
+ label="",
395
+ container=False,
396
+ elem_id="question-input"
397
+ )
398
+ include_images = gr.Checkbox(
399
+ label="Include Visuals",
400
+ value=True,
401
+ container=False,
402
+ elem_id="image-checkbox"
403
+ )
404
+
405
+ # Add the sliders container
406
+ with gr.Column(elem_classes="slider-container"):
407
+ gr.Markdown("### ⚙️ Generation Settings")
408
+
409
+ with gr.Row():
410
+ max_tokens = gr.Slider(
411
+ minimum=10,
412
+ maximum=MAX_TOKENS_LIMIT, # Increased to 1000
413
+ value=DEFAULT_MAX_TOKENS,
414
+ step=10,
415
+ label="Response Length (Tokens)",
416
+ info=f"Max: {MAX_TOKENS_LIMIT} tokens (for detailed explanations)",
417
+ elem_classes="slider-label"
418
+ )
419
+
420
+ # Conditionally visible image slider row
421
+ with gr.Row(visible=True) as image_slider_row:
422
+ num_images = gr.Slider(
423
+ minimum=0,
424
+ maximum=5,
425
+ value=DEFAULT_NUM_IMAGES,
426
+ step=1,
427
+ label="Number of Images",
428
+ info="Set to 0 to disable images",
429
+ elem_classes="slider-label"
430
+ )
431
+
432
+ with gr.Row():
433
+ submit_btn = gr.Button("Send Text", variant="primary")
434
+ mic_btn = gr.Button("Transcribe Voice", variant="secondary")
435
+ mic = gr.Audio(
436
+ sources=["microphone"],
437
+ type="numpy",
438
+ label="Voice Input",
439
+ show_label=False,
440
+ elem_id="voice-input"
441
+ )
442
+
443
+ processing = gr.HTML("""
444
+ <div id="processing" style="display: none;">
445
+ <div class="processing">🔮 Processing your request...</div>
446
+ </div>
447
+ """)
448
+
449
+ # Toggle image slider visibility based on checkbox
450
+ def toggle_image_slider(include_visuals):
451
+ return gr.update(visible=include_visuals)
452
+
453
+ include_images.change(
454
+ fn=toggle_image_slider,
455
+ inputs=include_images,
456
+ outputs=image_slider_row
457
+ )
458
+
459
+ def toggle_processing():
460
+ return gr.update(visible=True), gr.update(interactive=False)
461
+
462
+ def hide_processing():
463
+ return gr.update(visible=False), gr.update(interactive=True)
464
+
465
+ # Update the submit_btn click handler to include streaming
466
+ submit_btn.click(
467
+ fn=toggle_processing,
468
+ outputs=[processing, submit_btn]
469
+ ).then(
470
+ fn=lambda: (gr.update(visible=True), gr.update(interactive=False)),
471
+ outputs=[processing, submit_btn]
472
+ ).then(
473
+ fn=run_chat,
474
+ inputs=[user_input, state, include_images, max_tokens, num_images],
475
+ outputs=[chatbot, user_input]
476
+ ).then(
477
+ fn=lambda: (gr.update(visible=False), gr.update(interactive=True)),
478
+ outputs=[processing, submit_btn]
479
+ )
480
+
481
+ # Voice transcription remains the same
482
+ mic_btn.click(
483
+ fn=toggle_processing,
484
+ outputs=[processing, mic_btn]
485
+ ).then(
486
+ fn=transcribe,
487
+ inputs=mic,
488
+ outputs=user_input
489
+ ).then(
490
+ fn=hide_processing,
491
+ outputs=[processing, mic_btn]
492
+ )
493
+
494
+ if __name__ == "__main__":
495
+ demo.launch(share=True, debug=True)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.26.0
2
+ openvino-genai>=1.0.0
3
+ librosa>=0.10.0
4
+ numpy>=1.24.0
5
+ scipy>=1.10.0
6
+ huggingface_hub>=0.21.4
7
+ google-api-python-client
8
+
9
+ py-cpuinfo>=8.0.0