Spaces:

phani50101
/

EDU_CHAT_BY_PHANI

Running

App Files Files Community

phani50101 commited on 5 days ago

Commit

88bd3ae

1 Parent(s): 4ad1a5e

Add application file

Browse files

Files changed (2) hide show

app.py +495 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,495 @@

+from huggingface_hub import snapshot_download
+import gradio as gr
+import openvino_genai
+import librosa
+import numpy as np
+from threading import Lock, Event
+from scipy.ndimage import uniform_filter1d
+from queue import Queue, Empty
+from googleapiclient.discovery import build
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import time
+import cpuinfo
+import gc
+import os
+# Set CPU affinity for optimization
+os.environ["GOMP_CPU_AFFINITY"] = "0-7"  # Use first 8 CPU cores
+os.environ["OMP_NUM_THREADS"] = "8"
+# Configuration constants
+GOOGLE_API_KEY = "AIzaSyAo-1iW5MEZbc53DlEldtnUnDaYuTHUDH4"
+GOOGLE_CSE_ID = "3027bedf3c88a4efb"
+DEFAULT_MAX_TOKENS = 100
+DEFAULT_NUM_IMAGES = 1
+MAX_HISTORY_TURNS = 2
+MAX_TOKENS_LIMIT = 1000
+# Download models
+start_time = time.time()
+snapshot_download(repo_id="OpenVINO/mistral-7b-instruct-v0.1-int8-ov", local_dir="mistral-ov")
+snapshot_download(repo_id="OpenVINO/whisper-tiny-fp16-ov", local_dir="whisper-ov-model")
+print(f"Model download time: {time.time() - start_time:.2f} seconds")
+# CPU-specific configuration
+cpu_features = cpuinfo.get_cpu_info()['flags']
+config_options = {}
+if 'avx512' in cpu_features:
+    config_options["ENFORCE_BF16"] = "YES"
+    print("Using AVX512 optimizations")
+elif 'avx2' in cpu_features:
+    config_options["INFERENCE_PRECISION_HINT"] = "f32"
+    print("Using AVX2 optimizations")
+# Initialize models with performance flags
+start_time = time.time()
+mistral_pipe = openvino_genai.LLMPipeline(
+    "mistral-ov",
+    device="CPU",
+    config={
+        "PERFORMANCE_HINT": "THROUGHPUT",
+        **config_options
+    }
+)
+whisper_pipe = openvino_genai.WhisperPipeline(
+    "whisper-ov-model",
+    device="CPU"
+)
+pipe_lock = Lock()
+print(f"Model initialization time: {time.time() - start_time:.2f} seconds")
+# Warm up models
+print("Warming up models...")
+start_time = time.time()
+with pipe_lock:
+    mistral_pipe.generate("Warmup", openvino_genai.GenerationConfig(max_new_tokens=10))
+    whisper_pipe.generate(np.zeros(16000, dtype=np.float32))
+print(f"Model warmup time: {time.time() - start_time:.2f} seconds")
+# Thread pools
+generation_executor = ThreadPoolExecutor(max_workers=4)  # Increased workers
+image_executor = ThreadPoolExecutor(max_workers=8)
+def fetch_images(query: str, num: int = DEFAULT_NUM_IMAGES) -> list:
+    """Fetch images in parallel using ThreadPoolExecutor"""
+    start_time = time.time()
+    if num <= 0:
+        return []
+    try:
+        futures = []
+        service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
+        for _ in range(num):
+            future = image_executor.submit(
+                service.cse().list(q=query, cx=GOOGLE_CSE_ID, searchType="image", num=1).execute
+            )
+            futures.append(future)
+        image_links = []
+        for future in as_completed(futures):
+            try:
+                res = future.result()
+                if "items" in res and res["items"]:
+                    image_links.append(res["items"][0]["link"])
+            except Exception as e:
+                print(f"Image fetch error: {e}")
+        print(f"Parallel image fetch time: {time.time() - start_time:.2f} seconds")
+        return image_links
+    except Exception as e:
+        print(f"Error in image fetching: {e}")
+        return []
+def process_audio(data, sr):
+    start_time = time.time()
+    data = librosa.to_mono(data.T) if data.ndim > 1 else data
+    data = data.astype(np.float32)
+    data /= np.max(np.abs(data))
+    rms = librosa.feature.rms(y=data, frame_length=2048, hop_length=512)[0]
+    smoothed_rms = uniform_filter1d(rms, size=5)
+    speech_frames = np.where(smoothed_rms > 0.025)[0]
+    if not speech_frames.size:
+        print(f"Audio processing time: {time.time() - start_time:.2f} seconds")
+        return None
+    start = max(0, int(speech_frames[0] * 512 - 0.1 * sr))
+    end = min(len(data), int((speech_frames[-1] + 1) * 512 + 0.1 * sr))
+    print(f"Audio processing time: {time.time() - start_time:.2f} seconds")
+    return data[start:end]
+def transcribe(audio):
+    start_time = time.time()
+    if audio is None:
+        print(f"Transcription time: {time.time() - start_time:.2f} seconds")
+        return ""
+    sr, data = audio
+    processed = process_audio(data, sr)
+    if processed is None or len(processed) < 1600:
+        print(f"Transcription time: {time.time() - start_time:.2f} seconds")
+        return ""
+    if sr != 16000:
+        processed = librosa.resample(processed, orig_sr=sr, target_sr=16000)
+    result = whisper_pipe.generate(processed)
+    print(f"Transcription time: {time.time() - start_time:.2f} seconds")
+    return result
+def stream_answer(message: str, max_tokens: int, include_images: bool) -> str:
+    start_time = time.time()
+    response_queue = Queue()
+    completion_event = Event()
+    error = [None]
+    optimized_config = openvino_genai.GenerationConfig(
+        max_new_tokens=max_tokens,
+        num_beams=1,
+        do_sample=False,
+        temperature=1.0,
+        top_p=0.9,
+        top_k=30,
+        streaming=True,
+        streaming_interval=5  # Batch tokens in groups of 5
+    )
+    def callback(tokens):  # Now accepts multiple tokens
+        response_queue.put("".join(tokens))
+        return openvino_genai.StreamingStatus.RUNNING
+    def generate():
+        try:
+            with pipe_lock:
+                mistral_pipe.generate(message, optimized_config, callback)
+        except Exception as e:
+            error[0] = str(e)
+        finally:
+            completion_event.set()
+    generation_executor.submit(generate)
+    accumulated = []
+    token_count = 0
+    last_gc = time.time()
+    while not completion_event.is_set() or not response_queue.empty():
+        if error[0]:
+            yield f"Error: {error[0]}"
+            print(f"Stream answer time: {time.time() - start_time:.2f} seconds")
+            return
+        try:
+            token_batch = response_queue.get_nowait()
+            accumulated.append(token_batch)
+            token_count += len(token_batch)
+            # Periodic garbage collection
+            if time.time() - last_gc > 2.0:  # Every 2 seconds
+                gc.collect()
+                last_gc = time.time()
+            yield "".join(accumulated)
+        except Empty:
+            continue
+    print(f"Generated {token_count} tokens in {time.time() - start_time:.2f} seconds "
+          f"({token_count/(time.time() - start_time):.2f} tokens/sec)")
+    yield "".join(accumulated)
+def run_chat(message: str, history: list, include_images: bool, max_tokens: int, num_images: int):
+    start_time = time.time()
+    final_text = ""
+    # Create a placeholder for the streaming response
+    history.append((message, "", []))
+    rendered_history = render_history(history)
+    yield rendered_history, gr.update(value="", interactive=False)
+    # Stream tokens and update chatbot in real-time
+    for output in stream_answer(message, max_tokens, include_images):
+        final_text = output
+        # Update only the last response in history
+        updated_history = history[:-1] + [(message, final_text, [])]
+        rendered_history = render_history(updated_history)
+        yield rendered_history, gr.update(value="", interactive=False)
+    images = []
+    if include_images:
+        images = fetch_images(message, num_images)
+    # Update history with final response and images
+    history[-1] = (message, final_text, images)
+    if len(history) > MAX_HISTORY_TURNS:
+        history = history[-MAX_HISTORY_TURNS:]
+    rendered_history = render_history(history)
+    print(f"Total chat time: {time.time() - start_time:.2f} seconds")
+    yield rendered_history, gr.update(value="", interactive=True)
+def render_history(history):
+    start_time = time.time()
+    rendered = []
+    for user_msg, bot_msg, image_links in history:
+        text = bot_msg
+        if image_links:
+            images_html = "".join(
+                f"<img src='{url}' class='chat-image' onclick='showImage(\"{url}\")' />"
+                for url in image_links
+            )
+            text += f"<br><br><b>📸 Related Visuals:</b><br><div style='display: flex; flex-wrap: wrap;'>{images_html}</div>"
+        rendered.append((user_msg, text))
+    return rendered
+with gr.Blocks(css="""
+    .processing {
+        animation: pulse 1.5s infinite;
+        color: #4a5568;
+        padding: 10px;
+        border-radius: 5px;
+        text-align: center;
+        margin: 10px 0;
+    }
+    @keyframes pulse {
+        0%, 100% { opacity: 1; }
+        50% { opacity: 0.5; }
+    }
+    .chat-image {
+        cursor: pointer;
+        transition: transform 0.2s;
+        max-height: 100px;
+        margin: 4px;
+        border-radius: 8px;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    }
+    .chat-image:hover {
+        transform: scale(1.05);
+        box-shadow: 0 4px 8px rgba(0,0,0,0.2);
+    }
+    .modal {
+        position: fixed;
+        top: 0;
+        left: 0;
+        width: 100%;
+        height: 100%;
+        background: rgba(0,0,0,0.8);
+        display: none;
+        z-index: 1000;
+        cursor: zoom-out;
+    }
+    .modal-content {
+        position: absolute;
+        top: 50%;
+        left: 50%;
+        transform: translate(-50%, -50%);
+        max-width: 90%;
+        max-height: 90%;
+        background: white;
+        padding: 10px;
+        border-radius: 12px;
+    }
+    .modal-img {
+        width: auto;
+        height: auto;
+        max-width: 100%;
+        max-height: 100%;
+        border-radius: 8px;
+    }
+    .chat-container {
+        border: 1px solid #e5e7eb;
+        border-radius: 12px;
+        padding: 20px;
+        margin-bottom: 20px;
+    }
+    .slider-container {
+        margin-top: 20px;
+        padding: 15px;
+        border-radius: 10px;
+        background-color: #f8f9fa;
+    }
+    .slider-label {
+        font-weight: bold;
+        margin-bottom: 5px;
+    }
+    .system-info {
+        background-color: #7B9BDB;
+        padding: 15px;
+        border-radius: 8px;
+        margin: 15px 0;
+        border-left: 4px solid #1890ff;
+    }
+    .typing-indicator {
+        display: inline-block;
+        position: relative;
+        width: 40px;
+        height: 20px;
+    }
+    .typing-dot {
+        display: inline-block;
+        width: 6px;
+        height: 6px;
+        border-radius: 50%;
+        background-color: #4a5568;
+        position: absolute;
+        animation: typing 1.4s infinite ease-in-out;
+    }
+    .typing-dot:nth-child(1) {
+        left: 0;
+        animation-delay: 0s;
+    }
+    .typing-dot:nth-child(2) {
+        left: 12px;
+        animation-delay: 0.2s;
+    }
+    .typing-dot:nth-child(3) {
+        left: 24px;
+        animation-delay: 0.4s;
+    }
+    @keyframes typing {
+        0%, 60%, 100% { transform: translateY(0); }
+        30% { transform: translateY(-5px); }
+    }
+""") as demo:
+    gr.Markdown("# 🤖 EDU CHAT BY PHANINDRA REDDY K")
+    # System info banner
+    gr.HTML("""
+    <div class="system-info">
+        <strong>Performance Optimized for High-RAM Systems</strong>
+        <ul>
+            <li>Adaptive resource allocation based on request type</li>
+        </ul>
+    </div>
+    """)
+    modal_html = """
+    <div class="modal" id="imageModal" onclick="this.style.display='none'">
+        <div class="modal-content">
+            <img class="modal-img" id="expandedImg">
+        </div>
+    </div>
+    <script>
+    function showImage(url) {
+        document.getElementById('expandedImg').src = url;
+        document.getElementById('imageModal').style.display = 'block';
+    }
+    </script>
+    """
+    gr.HTML(modal_html)
+    state = gr.State([])
+    with gr.Column(scale=2, elem_classes="chat-container"):
+        chatbot = gr.Chatbot(label="Conversation", height=500, bubble_full_width=False)
+    with gr.Column(scale=1):
+        gr.Markdown("### 💬 Ask Your Question")
+        with gr.Row():
+            user_input = gr.Textbox(
+                placeholder="Type your question here...",
+                label="",
+                container=False,
+                elem_id="question-input"
+            )
+            include_images = gr.Checkbox(
+                label="Include Visuals",
+                value=True,
+                container=False,
+                elem_id="image-checkbox"
+            )
+        # Add the sliders container
+        with gr.Column(elem_classes="slider-container"):
+            gr.Markdown("### ⚙️ Generation Settings")
+            with gr.Row():
+                max_tokens = gr.Slider(
+                    minimum=10,
+                    maximum=MAX_TOKENS_LIMIT,  # Increased to 1000
+                    value=DEFAULT_MAX_TOKENS,
+                    step=10,
+                    label="Response Length (Tokens)",
+                    info=f"Max: {MAX_TOKENS_LIMIT} tokens (for detailed explanations)",
+                    elem_classes="slider-label"
+                )
+            # Conditionally visible image slider row
+            with gr.Row(visible=True) as image_slider_row:
+                num_images = gr.Slider(
+                    minimum=0,
+                    maximum=5,
+                    value=DEFAULT_NUM_IMAGES,
+                    step=1,
+                    label="Number of Images",
+                    info="Set to 0 to disable images",
+                    elem_classes="slider-label"
+                )
+        with gr.Row():
+            submit_btn = gr.Button("Send Text", variant="primary")
+            mic_btn = gr.Button("Transcribe Voice", variant="secondary")
+            mic = gr.Audio(
+                sources=["microphone"],
+                type="numpy",
+                label="Voice Input",
+                show_label=False,
+                elem_id="voice-input"
+            )
+        processing = gr.HTML("""
+            <div id="processing" style="display: none;">
+                <div class="processing">🔮 Processing your request...</div>
+            </div>
+        """)
+    # Toggle image slider visibility based on checkbox
+    def toggle_image_slider(include_visuals):
+        return gr.update(visible=include_visuals)
+    include_images.change(
+        fn=toggle_image_slider,
+        inputs=include_images,
+        outputs=image_slider_row
+    )
+    def toggle_processing():
+        return gr.update(visible=True), gr.update(interactive=False)
+    def hide_processing():
+        return gr.update(visible=False), gr.update(interactive=True)
+    # Update the submit_btn click handler to include streaming
+    submit_btn.click(
+        fn=toggle_processing,
+        outputs=[processing, submit_btn]
+    ).then(
+        fn=lambda: (gr.update(visible=True), gr.update(interactive=False)),
+        outputs=[processing, submit_btn]
+    ).then(
+        fn=run_chat,
+        inputs=[user_input, state, include_images, max_tokens, num_images],
+        outputs=[chatbot, user_input]
+    ).then(
+        fn=lambda: (gr.update(visible=False), gr.update(interactive=True)),
+        outputs=[processing, submit_btn]
+    )
+    # Voice transcription remains the same
+    mic_btn.click(
+        fn=toggle_processing,
+        outputs=[processing, mic_btn]
+    ).then(
+        fn=transcribe,
+        inputs=mic,
+        outputs=user_input
+    ).then(
+        fn=hide_processing,
+        outputs=[processing, mic_btn]
+    )
+if __name__ == "__main__":
+    demo.launch(share=True, debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio==4.26.0
+openvino-genai>=1.0.0
+librosa>=0.10.0
+numpy>=1.24.0
+scipy>=1.10.0
+huggingface_hub>=0.21.4
+google-api-python-client
+py-cpuinfo>=8.0.0