chatbox / app.py
phani50101's picture
Add application main
b02b868
raw
history blame
3.64 kB
from huggingface_hub import snapshot_download
# Download models from Hugging Face to local folders
snapshot_download(
repo_id="OpenVINO/Mistral-7B-Instruct-v0.2-int4-ov",
local_dir="mistral-ov"
)
snapshot_download(
repo_id="OpenVINO/whisper-tiny-fp16-ov",
local_dir="whisper-ov-model"
)
import gradio as gr
import openvino_genai
import librosa
import numpy as np
from threading import Thread, Lock, Event
from scipy.ndimage import uniform_filter1d
from queue import Queue, Empty
# Initialize Mistral pipeline
mistral_pipe = openvino_genai.LLMPipeline("mistral-ov", device="CPU")
config = openvino_genai.GenerationConfig(
max_new_tokens=100,
num_beams=1,
do_sample=False,
temperature=0.0,
top_p=1.0,
top_k=50
)
pipe_lock = Lock()
# Initialize Whisper pipeline
whisper_pipe = openvino_genai.WhisperPipeline("whisper-ov-model", device="CPU")
def process_audio(data, sr):
"""Audio processing with silence trimming"""
data = librosa.to_mono(data.T) if data.ndim > 1 else data
data = data.astype(np.float32)
data /= np.max(np.abs(data))
# Voice activity detection
frame_length, hop_length = 2048, 512
rms = librosa.feature.rms(y=data, frame_length=frame_length, hop_length=hop_length)[0]
smoothed_rms = uniform_filter1d(rms, size=5)
speech_frames = np.where(smoothed_rms > 0.025)[0]
if not speech_frames.size:
return None
start = max(0, int(speech_frames[0] * hop_length - 0.1*sr))
end = min(len(data), int((speech_frames[-1]+1) * hop_length + 0.1*sr))
return data[start:end]
def transcribe(audio):
"""Audio to text transcription"""
sr, data = audio
processed = process_audio(data, sr)
if processed is None or len(processed) < 1600:
return ""
if sr != 16000:
processed = librosa.resample(processed, orig_sr=sr, target_sr=16000)
return whisper_pipe.generate(processed)
def stream_generator(message, history):
response_queue = Queue()
completion_event = Event()
error_message = [None]
def callback(token):
response_queue.put(token)
return openvino_genai.StreamingStatus.RUNNING
def generate():
try:
with pipe_lock:
mistral_pipe.generate(message, config, callback)
except Exception as e:
error_message[0] = str(e)
finally:
completion_event.set()
Thread(target=generate, daemon=True).start()
accumulated = []
while not completion_event.is_set() or not response_queue.empty():
if error_message[0]:
yield f"Error: {error_message[0]}"
return
try:
token = response_queue.get_nowait()
accumulated.append(token)
yield "".join(accumulated)
except Empty:
continue
yield "".join(accumulated)
with gr.Blocks() as demo:
chat_interface = gr.ChatInterface(
stream_generator,
textbox=gr.Textbox(placeholder="Ask Mistral...", container=False),
title="EDU CHAT BY PHANINDRA REDDY K",
examples=[
"Explain quantum physics simply",
"Write a haiku about technology",
"What's the meaning of life?"
],
cache_examples=False,
)
with gr.Row():
audio = gr.Audio(sources=["microphone"], type="numpy", label="Voice Input")
transcribe_btn = gr.Button("Send Transcription")
transcribe_btn.click(
transcribe,
inputs=audio,
outputs=chat_interface.textbox
)
if __name__ == "__main__":
demo.launch(share=True,debug=True)