Spaces:

phani50101
/

chatbox

Sleeping

App Files Files Community

phani50101 commited on May 23

Commit

f4c79df

1 Parent(s): 0a7cc32

Add application file

Browse files

Files changed (1) hide show

app.py +122 -0

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import gradio as gr
+import openvino_genai
+import librosa
+import numpy as np
+from threading import Thread, Lock,Event
+from scipy.ndimage import uniform_filter1d
+from queue import Queue, Empty
+# Initialize Mistral pipeline
+mistral_pipe = openvino_genai.LLMPipeline("mistral-ov", device="CPU")
+config = openvino_genai.GenerationConfig(
+      max_new_tokens=100,  # Increased for better context
+    num_beams=1,         # Keep greedy search for speed
+    do_sample=False,     # Disable sampling for faster generation
+    temperature=0.0,     # Set to 0 since sampling is disabled
+    top_p=1.0,           # Disable top-p filtering
+    top_k=50
+)
+pipe_lock = Lock()
+# Initialize Whisper pipeline
+whisper_pipe = openvino_genai.WhisperPipeline("whisper-ov-model", device="CPU")
+def process_audio(data, sr):
+    """Audio processing with silence trimming"""
+    data = librosa.to_mono(data.T) if data.ndim > 1 else data
+    data = data.astype(np.float32)
+    data /= np.max(np.abs(data))
+    # Voice activity detection
+    frame_length, hop_length = 2048, 512
+    rms = librosa.feature.rms(y=data, frame_length=frame_length, hop_length=hop_length)[0]
+    smoothed_rms = uniform_filter1d(rms, size=5)
+    speech_frames = np.where(smoothed_rms > 0.025)[0]
+    if not speech_frames.size:
+        return None
+    start = max(0, int(speech_frames[0] * hop_length - 0.1*sr))
+    end = min(len(data), int((speech_frames[-1]+1) * hop_length + 0.1*sr))
+    return data[start:end]
+def transcribe(audio):
+    """Audio to text transcription"""
+    sr, data = audio
+    processed = process_audio(data, sr)
+    if processed is None or len(processed) < 1600:
+        return ""
+    if sr != 16000:
+        processed = librosa.resample(processed, orig_sr=sr, target_sr=16000)
+    return whisper_pipe.generate(processed)
+def stream_generator(message, history):
+    """Original Mistral streaming function (unchanged)"""
+    response_queue = Queue()
+    completion_event = Event()
+    error_message = [None]
+    def callback(token):
+        response_queue.put(token)
+        return openvino_genai.StreamingStatus.RUNNING
+    def generate():
+        try:
+            with pipe_lock:
+                mistral_pipe.generate(message, config, callback)
+        except Exception as e:
+            error_message[0] = str(e)
+        finally:
+            completion_event.set()
+    Thread(target=generate, daemon=True).start()
+    accumulated = []
+    while not completion_event.is_set() or not response_queue.empty():
+        if error_message[0]:
+            yield f"Error: {error_message[0]}"
+            return
+        try:
+            token = response_queue.get_nowait()
+            accumulated.append(token)
+            yield "".join(accumulated)
+        except Empty:
+            continue
+    yield "".join(accumulated)
+# Create interface with added voice input
+with gr.Blocks() as demo:
+    # Original chat interface
+    chat_interface = gr.ChatInterface(
+        stream_generator,
+        textbox=gr.Textbox(placeholder="Ask Mistral...", container=False),
+        title="EDU CHAT BY PHANINDRA REDDY K",
+        examples=[
+            "Explain quantum physics simply",
+            "Write a haiku about technology",
+            "What's the meaning of life?"
+        ],
+        cache_examples=False,
+    )
+    # Add voice input below examples
+    with gr.Row():
+        audio = gr.Audio(sources=["microphone"], type="numpy", label="Voice Input")
+        transcribe_btn = gr.Button("Send Transcription")
+    # Connect transcription to chat input
+    transcribe_btn.click(
+        transcribe,
+        inputs=audio,
+        outputs=chat_interface.textbox
+    )
+if __name__ == "__main__":
+    demo.launch(share=True, debug=True)